propagation_gan/opencellid_parsing/003b.extract_only_enough_da...

58 lines
2.0 KiB
Python

#!/usr/bin/python
import os
import sys
import pickle
############################
# extract measurements
############################
folderpath = sys.argv[1]
pickle_fp_with_gt = "{}/all_with_groundtruth_cleaned.pickle".format(folderpath)
if not os.path.isfile(pickle_fp_with_gt):
print("run 003a script first")
sys.exit(2)
data = pickle.load(open(pickle_fp_with_gt, 'rb'))
threshold = int(input("please enter minimal number of measurements:"))
outputfolder = '{}_extracted_thresh_{}'.format(folderpath.rstrip('/'), threshold)
headline = '#lon,lat,sig,measure_at,upload_at,rating,speed,direction'
headlines = {
'UMTS': headline + ',rnc,cidshort,psc',
'GSM': headline + ',timingadv',
'LTE': headline + ',timingadv,tac,pci',
'CDMA': headline + ',sid,nid,bid',
}
print("extracting each file to disk (only groundtruth we cared)..")
counter = 0
counter_valid = 0
for filepath in data:
counter += 1
if counter % 1000 == 0:
print("progress: {} out of {} files extracted ({:.2f}%)".format(counter, len(data), 100.0 * counter / len(data)))
if len(data[filepath][1]) < threshold:
print("{}: entry less than {}, skipping".format(filepath, threshold))
continue
counter_valid += 1
if not os.path.isfile(filepath):
outputsubfolder = os.path.dirname(filepath)
if not os.path.isdir(outputsubfolder):
try:
os.makedirs(outputsubfolder)
except BaseException:
print("err: cannot create folder {}!!!!".format(outputsubfolder))
continue
with open(filepath, 'w') as of:
of.write(headlines.get(data[filepath][0], headline) + '\n')
with open(filepath, 'a') as of:
for dataline in data[filepath][1]:
of.write(dataline + "\n")
print("done: {} out of {} files extracted ({:.2f}%)".format(counter_valid, len(data), 100.0 * counter_valid / len(data)))
os.rename('{}_extracted'.format(folderpath.rstrip('/')), outputfolder)