53 lines
1.6 KiB
Python
53 lines
1.6 KiB
Python
#!/usr/bin/python
|
|
|
|
import os
|
|
import sys
|
|
import pickle
|
|
|
|
print("load ~45GB data into memory! do not try this if not on server machine")
|
|
|
|
############################
|
|
# extract measurements
|
|
############################
|
|
folderpath = sys.argv[1]
|
|
all_pickle_fp = "{}/all.pickle".format(folderpath)
|
|
pickle_fp_with_gt = "{}/all_with_groundtruth.pickle".format(folderpath)
|
|
pickle_fp_no_gt = "{}/all_no_groundtruth.pickle".format(folderpath)
|
|
pickle_fp_unseen = "{}/all_unseen.pickle".format(folderpath)
|
|
|
|
if os.path.isfile(all_pickle_fp):
|
|
# load data only
|
|
data = pickle.load(open(all_pickle_fp, 'rb'))
|
|
else:
|
|
# load and combine data
|
|
files = [file for file in os.listdir(folderpath) if '_parsed.pickle' in file]
|
|
data = {}
|
|
for file in sorted(files, key=lambda x: int(x.rstrip('_parsed.pickle').split('_')[-1])):
|
|
print("loading {}".format(file))
|
|
datatmp = pickle.load(open("{}/{}".format(folderpath, file), 'rb'))
|
|
print("combining..")
|
|
for key in datatmp:
|
|
if key not in data:
|
|
data[key] = datatmp[key]
|
|
else:
|
|
data[key][1] += datatmp[key][1]
|
|
|
|
pickle.dump(data, open(all_pickle_fp, 'wb'))
|
|
|
|
|
|
data_with_gt = {}
|
|
data_no_gt = {}
|
|
data_unseen = {}
|
|
|
|
for key in data:
|
|
if "with_groundtruth" in key:
|
|
data_with_gt[key] = data[key]
|
|
elif "without_groundtruth" in key:
|
|
data_no_gt[key] = data[key]
|
|
elif "unseen" in key:
|
|
data_unseen[key] = data[key]
|
|
|
|
pickle.dump(data_with_gt, open(pickle_fp_with_gt, 'wb'))
|
|
pickle.dump(data_no_gt, open(pickle_fp_no_gt, 'wb'))
|
|
pickle.dump(data_unseen, open(pickle_fp_unseen, 'wb'))
|