got the stats

This commit is contained in:
HappyZ 2019-06-10 12:53:15 -05:00
parent f1399395a9
commit bac10eb855
5 changed files with 59 additions and 7 deletions

View File

@ -48,5 +48,5 @@ for key in data:
data_unseen[key] = data[key]
pickle.dump(data_with_gt, open(pickle_fp_with_gt, 'wb'))
pickle.dump(data_no_gt, open(pickle_no_gt, 'wb'))
pickle.dump(data_no_gt, open(pickle_fp_no_gt, 'wb'))
pickle.dump(data_unseen, open(pickle_fp_unseen, 'wb'))

View File

@ -0,0 +1,51 @@
#!/usr/bin/python
import os
import sys
import pickle
############################
# extract measurements
############################
folderpath = sys.argv[1]
pickle_fp_with_gt = "{}/all_with_groundtruth.pickle".format(folderpath)
cleaned_pickle_fp_with_gt = "{}/all_with_groundtruth_cleaned.pickle".format(folderpath)
cleaned_stat_fp_with_gt = "{}/all_with_groundtruth_cleaned.stat".format(folderpath)
if not os.path.isfile(pickle_fp_with_gt):
print("run 002 script first")
sys.exit(2)
data = pickle.load(open(pickle_fp_with_gt, 'rb'))
# clean data
cleaned_dist = {}
for key in data.keys():
length_before = len(data[key][1])
# remove duplicated entries
entries = []
for each in data[key][1]:
lon, lat, sig = each.split(',')[0:3]
entries.append((lon, lat, sig))
for i in range(len(entries)-1, 0, -1):
if entries[i] == entries[i-1]:
del entries[i]
del data[key][1][i]
length_after = len(data[key][1])
if length_after is 0:
del data[key]
else:
if length_after not in cleaned_dist:
cleaned_dist[length_after] = 0
cleaned_dist[length_after] += 1
# print("{}: after removal {} remained (original {})".format(key, length_after, length_before))
pickle.dump(data, open(cleaned_pickle_fp_with_gt, 'wb'))
# summarize number of measurements
cleaned_dist_pdf = sorted(cleaned_dist.items(), key=lambda x: x[0])
with open(cleaned_stat_fp_with_gt, 'w') as f:
total_count = 0
for num, count in cleaned_dist_pdf:
total_count += count
f.write("{},{},{}\n".format(num, count, total_count))

View File

@ -8,16 +8,17 @@ import pickle
# extract measurements
############################
folderpath = sys.argv[1]
pickle_fp_with_gt = "{}/all_with_groundtruth.pickle".format(folderpath)
pickle_fp_with_gt = "{}/all_with_groundtruth_cleaned.pickle".format(folderpath)
if not os.path.isfile(pickle_fp_with_gt):
print("run 002 script first")
print("run 003a script first")
sys.exit(2)
data = pickle.load(open(pickle_fp_with_gt, 'rb'))
outputfolder = folderpath.rstrip('/') + '_extracted'
threshold = int(input("please enter minimal number of measurements:"))
outputfolder = '{}_extracted_thresh_{}'.format(folderpath.rstrip('/'), threshold)
if not os.path.isdir(outputfolder):
os.makedirs(outputfolder)
@ -36,8 +37,8 @@ for filepath in data:
counter += 1
if counter % 1000 == 0:
print("progress: {} out of {} files extracted ({:.2f}%)".format(counter, len(data), 100.0 * counter / len(data)))
if len(data[filepath][1]) < 10:
print("{}: entry less than 10, skipping".format(filepath))
if len(data[filepath][1]) < threshold:
print("{}: entry less than {}, skipping".format(filepath, threshold))
continue
counter_valid += 1
if not os.path.isfile(filepath):

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB