#!/usr/bin/python import os import sys import pickle ############################ # extract measurements ############################ folderpath = sys.argv[1] pickle_fp_with_gt = "{}/all_with_groundtruth.pickle".format(folderpath) cleaned_pickle_fp_with_gt = "{}/all_with_groundtruth_cleaned.pickle".format(folderpath) cleaned_stat_fp_with_gt = "{}/all_with_groundtruth_cleaned.stat".format(folderpath) if not os.path.isfile(pickle_fp_with_gt): print("run 002 script first") sys.exit(2) data = pickle.load(open(pickle_fp_with_gt, 'rb')) # clean data cleaned_dist = {} for key in data.keys(): length_before = len(data[key][1]) # remove duplicated entries entries = [] for each in data[key][1]: lon, lat, sig = each.split(',')[0:3] entries.append((lon, lat, sig)) for i in range(len(entries)-1, 0, -1): if entries[i] == entries[i-1]: del entries[i] del data[key][1][i] length_after = len(data[key][1]) if length_after is 0: del data[key] else: if length_after not in cleaned_dist: cleaned_dist[length_after] = 0 cleaned_dist[length_after] += 1 # print("{}: after removal {} remained (original {})".format(key, length_after, length_before)) pickle.dump(data, open(cleaned_pickle_fp_with_gt, 'wb')) # summarize number of measurements cleaned_dist_pdf = sorted(cleaned_dist.items(), key=lambda x: x[0]) with open(cleaned_stat_fp_with_gt, 'w') as f: total_count = 0 for num, count in cleaned_dist_pdf: total_count += count f.write("{},{},{}\n".format(num, count, total_count))