#!/usr/bin/python import os import sys import pickle print("load ~45GB data into memory! do not try this if not on server machine") ############################ # extract measurements ############################ folderpath = sys.argv[1] all_pickle_fp = "{}/all.pickle".format(folderpath) pickle_fp_with_gt = "{}/all_with_groundtruth.pickle".format(folderpath) pickle_fp_no_gt = "{}/all_no_groundtruth.pickle".format(folderpath) pickle_fp_unseen = "{}/all_unseen.pickle".format(folderpath) if os.path.isfile(all_pickle_fp): # load data only data = pickle.load(open(all_pickle_fp, 'rb')) else: # load and combine data files = [file for file in os.listdir(folderpath) if '_parsed.pickle' in file] data = {} for file in sorted(files, key=lambda x: int(x.rstrip('_parsed.pickle').split('_')[-1])): print("loading {}".format(file)) datatmp = pickle.load(open("{}/{}".format(folderpath, file), 'rb')) print("combining..") for key in datatmp: if key not in data: data[key] = datatmp[key] else: data[key][1] += datatmp[key][1] pickle.dump(data, open(all_pickle_fp, 'wb')) data_with_gt = {} data_no_gt = {} data_unseen = {} for key in data: if "with_groundtruth" in key: data_with_gt[key] = data[key] elif "without_groundtruth" in key: data_no_gt[key] = data[key] elif "unseen" in key: data_unseen[key] = data[key] pickle.dump(data_with_gt, open(pickle_fp_with_gt, 'wb')) pickle.dump(data_no_gt, open(pickle_fp_no_gt, 'wb')) pickle.dump(data_unseen, open(pickle_fp_unseen, 'wb'))