got the stats
This commit is contained in:
parent
f1399395a9
commit
bac10eb855
|
|
@ -48,5 +48,5 @@ for key in data:
|
|||
data_unseen[key] = data[key]
|
||||
|
||||
pickle.dump(data_with_gt, open(pickle_fp_with_gt, 'wb'))
|
||||
pickle.dump(data_no_gt, open(pickle_no_gt, 'wb'))
|
||||
pickle.dump(data_no_gt, open(pickle_fp_no_gt, 'wb'))
|
||||
pickle.dump(data_unseen, open(pickle_fp_unseen, 'wb'))
|
||||
|
|
|
|||
|
|
@ -0,0 +1,51 @@
|
|||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import pickle
|
||||
|
||||
############################
|
||||
# extract measurements
|
||||
############################
|
||||
folderpath = sys.argv[1]
|
||||
pickle_fp_with_gt = "{}/all_with_groundtruth.pickle".format(folderpath)
|
||||
cleaned_pickle_fp_with_gt = "{}/all_with_groundtruth_cleaned.pickle".format(folderpath)
|
||||
cleaned_stat_fp_with_gt = "{}/all_with_groundtruth_cleaned.stat".format(folderpath)
|
||||
|
||||
if not os.path.isfile(pickle_fp_with_gt):
|
||||
print("run 002 script first")
|
||||
sys.exit(2)
|
||||
|
||||
data = pickle.load(open(pickle_fp_with_gt, 'rb'))
|
||||
|
||||
# clean data
|
||||
cleaned_dist = {}
|
||||
for key in data.keys():
|
||||
length_before = len(data[key][1])
|
||||
# remove duplicated entries
|
||||
entries = []
|
||||
for each in data[key][1]:
|
||||
lon, lat, sig = each.split(',')[0:3]
|
||||
entries.append((lon, lat, sig))
|
||||
for i in range(len(entries)-1, 0, -1):
|
||||
if entries[i] == entries[i-1]:
|
||||
del entries[i]
|
||||
del data[key][1][i]
|
||||
length_after = len(data[key][1])
|
||||
if length_after is 0:
|
||||
del data[key]
|
||||
else:
|
||||
if length_after not in cleaned_dist:
|
||||
cleaned_dist[length_after] = 0
|
||||
cleaned_dist[length_after] += 1
|
||||
# print("{}: after removal {} remained (original {})".format(key, length_after, length_before))
|
||||
|
||||
pickle.dump(data, open(cleaned_pickle_fp_with_gt, 'wb'))
|
||||
|
||||
# summarize number of measurements
|
||||
cleaned_dist_pdf = sorted(cleaned_dist.items(), key=lambda x: x[0])
|
||||
with open(cleaned_stat_fp_with_gt, 'w') as f:
|
||||
total_count = 0
|
||||
for num, count in cleaned_dist_pdf:
|
||||
total_count += count
|
||||
f.write("{},{},{}\n".format(num, count, total_count))
|
||||
|
|
@ -8,16 +8,17 @@ import pickle
|
|||
# extract measurements
|
||||
############################
|
||||
folderpath = sys.argv[1]
|
||||
pickle_fp_with_gt = "{}/all_with_groundtruth.pickle".format(folderpath)
|
||||
pickle_fp_with_gt = "{}/all_with_groundtruth_cleaned.pickle".format(folderpath)
|
||||
|
||||
if not os.path.isfile(pickle_fp_with_gt):
|
||||
print("run 002 script first")
|
||||
print("run 003a script first")
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
data = pickle.load(open(pickle_fp_with_gt, 'rb'))
|
||||
|
||||
outputfolder = folderpath.rstrip('/') + '_extracted'
|
||||
threshold = int(input("please enter minimal number of measurements:"))
|
||||
|
||||
outputfolder = '{}_extracted_thresh_{}'.format(folderpath.rstrip('/'), threshold)
|
||||
if not os.path.isdir(outputfolder):
|
||||
os.makedirs(outputfolder)
|
||||
|
||||
|
|
@ -36,8 +37,8 @@ for filepath in data:
|
|||
counter += 1
|
||||
if counter % 1000 == 0:
|
||||
print("progress: {} out of {} files extracted ({:.2f}%)".format(counter, len(data), 100.0 * counter / len(data)))
|
||||
if len(data[filepath][1]) < 10:
|
||||
print("{}: entry less than 10, skipping".format(filepath))
|
||||
if len(data[filepath][1]) < threshold:
|
||||
print("{}: entry less than {}, skipping".format(filepath, threshold))
|
||||
continue
|
||||
counter_valid += 1
|
||||
if not os.path.isfile(filepath):
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 19 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 28 KiB |
Loading…
Reference in New Issue