got the stats
This commit is contained in:
parent
f1399395a9
commit
bac10eb855
|
|
@ -48,5 +48,5 @@ for key in data:
|
||||||
data_unseen[key] = data[key]
|
data_unseen[key] = data[key]
|
||||||
|
|
||||||
pickle.dump(data_with_gt, open(pickle_fp_with_gt, 'wb'))
|
pickle.dump(data_with_gt, open(pickle_fp_with_gt, 'wb'))
|
||||||
pickle.dump(data_no_gt, open(pickle_no_gt, 'wb'))
|
pickle.dump(data_no_gt, open(pickle_fp_no_gt, 'wb'))
|
||||||
pickle.dump(data_unseen, open(pickle_fp_unseen, 'wb'))
|
pickle.dump(data_unseen, open(pickle_fp_unseen, 'wb'))
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,51 @@
|
||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
############################
|
||||||
|
# extract measurements
|
||||||
|
############################
|
||||||
|
folderpath = sys.argv[1]
|
||||||
|
pickle_fp_with_gt = "{}/all_with_groundtruth.pickle".format(folderpath)
|
||||||
|
cleaned_pickle_fp_with_gt = "{}/all_with_groundtruth_cleaned.pickle".format(folderpath)
|
||||||
|
cleaned_stat_fp_with_gt = "{}/all_with_groundtruth_cleaned.stat".format(folderpath)
|
||||||
|
|
||||||
|
if not os.path.isfile(pickle_fp_with_gt):
|
||||||
|
print("run 002 script first")
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
data = pickle.load(open(pickle_fp_with_gt, 'rb'))
|
||||||
|
|
||||||
|
# clean data
|
||||||
|
cleaned_dist = {}
|
||||||
|
for key in data.keys():
|
||||||
|
length_before = len(data[key][1])
|
||||||
|
# remove duplicated entries
|
||||||
|
entries = []
|
||||||
|
for each in data[key][1]:
|
||||||
|
lon, lat, sig = each.split(',')[0:3]
|
||||||
|
entries.append((lon, lat, sig))
|
||||||
|
for i in range(len(entries)-1, 0, -1):
|
||||||
|
if entries[i] == entries[i-1]:
|
||||||
|
del entries[i]
|
||||||
|
del data[key][1][i]
|
||||||
|
length_after = len(data[key][1])
|
||||||
|
if length_after is 0:
|
||||||
|
del data[key]
|
||||||
|
else:
|
||||||
|
if length_after not in cleaned_dist:
|
||||||
|
cleaned_dist[length_after] = 0
|
||||||
|
cleaned_dist[length_after] += 1
|
||||||
|
# print("{}: after removal {} remained (original {})".format(key, length_after, length_before))
|
||||||
|
|
||||||
|
pickle.dump(data, open(cleaned_pickle_fp_with_gt, 'wb'))
|
||||||
|
|
||||||
|
# summarize number of measurements
|
||||||
|
cleaned_dist_pdf = sorted(cleaned_dist.items(), key=lambda x: x[0])
|
||||||
|
with open(cleaned_stat_fp_with_gt, 'w') as f:
|
||||||
|
total_count = 0
|
||||||
|
for num, count in cleaned_dist_pdf:
|
||||||
|
total_count += count
|
||||||
|
f.write("{},{},{}\n".format(num, count, total_count))
|
||||||
|
|
@ -8,16 +8,17 @@ import pickle
|
||||||
# extract measurements
|
# extract measurements
|
||||||
############################
|
############################
|
||||||
folderpath = sys.argv[1]
|
folderpath = sys.argv[1]
|
||||||
pickle_fp_with_gt = "{}/all_with_groundtruth.pickle".format(folderpath)
|
pickle_fp_with_gt = "{}/all_with_groundtruth_cleaned.pickle".format(folderpath)
|
||||||
|
|
||||||
if not os.path.isfile(pickle_fp_with_gt):
|
if not os.path.isfile(pickle_fp_with_gt):
|
||||||
print("run 002 script first")
|
print("run 003a script first")
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
|
|
||||||
data = pickle.load(open(pickle_fp_with_gt, 'rb'))
|
data = pickle.load(open(pickle_fp_with_gt, 'rb'))
|
||||||
|
|
||||||
outputfolder = folderpath.rstrip('/') + '_extracted'
|
threshold = int(input("please enter minimal number of measurements:"))
|
||||||
|
|
||||||
|
outputfolder = '{}_extracted_thresh_{}'.format(folderpath.rstrip('/'), threshold)
|
||||||
if not os.path.isdir(outputfolder):
|
if not os.path.isdir(outputfolder):
|
||||||
os.makedirs(outputfolder)
|
os.makedirs(outputfolder)
|
||||||
|
|
||||||
|
|
@ -36,8 +37,8 @@ for filepath in data:
|
||||||
counter += 1
|
counter += 1
|
||||||
if counter % 1000 == 0:
|
if counter % 1000 == 0:
|
||||||
print("progress: {} out of {} files extracted ({:.2f}%)".format(counter, len(data), 100.0 * counter / len(data)))
|
print("progress: {} out of {} files extracted ({:.2f}%)".format(counter, len(data), 100.0 * counter / len(data)))
|
||||||
if len(data[filepath][1]) < 10:
|
if len(data[filepath][1]) < threshold:
|
||||||
print("{}: entry less than 10, skipping".format(filepath))
|
print("{}: entry less than {}, skipping".format(filepath, threshold))
|
||||||
continue
|
continue
|
||||||
counter_valid += 1
|
counter_valid += 1
|
||||||
if not os.path.isfile(filepath):
|
if not os.path.isfile(filepath):
|
||||||
Binary file not shown.
|
After Width: | Height: | Size: 19 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 28 KiB |
Loading…
Reference in New Issue