diff --git a/opencellid_parsing/001.data_extraction.py b/opencellid_parsing/001.parse_measurements.py
similarity index 89%
rename from opencellid_parsing/001.data_extraction.py
rename to opencellid_parsing/001.parse_measurements.py
index 7d4b79a..618e047 100644
--- a/opencellid_parsing/001.data_extraction.py
+++ b/opencellid_parsing/001.parse_measurements.py
@@ -92,28 +92,12 @@ else:
 
 
 files = [file for file in os.listdir(folderpath) if '.csv' in file]
-for file in sorted(files, key=lambda x: x.rstrip('.csv').split('_')[-1]):
+for file in sorted(files, key=lambda x: int(x.rstrip('.csv').split('_')[-1])):
 
     print("looking at file: {}".format(file))
     pickle_file = file.replace('.csv', '_parsed.pickle')
 
-    if os.path.isfile("{}/{}".format(folderpath, pickle_file)):
-        print("found the pickle, loading directly..")
-        mybufferdict = pickle.load(open("{}/{}".format(folderpath, pickle_file), 'rb'))
-
-        print("extracting each file to disk..")
-        counter = 0
-        for filepath in mybufferdict:
-            if not os.path.isfile(filepath):
-                with open(filepath, 'w') as of:
-                    of.write(headlines.get(mybufferdict[filepath][0], headline) + '\n')
-            with open(filepath, 'a') as of:
-                for dataline in mybufferdict[filepath][1]:
-                    of.write(dataline + "\n")
-            counter += 1
-            if counter % 1000 == 0:
-                print("progress: {} out of {} extracted ({:.2f}%)".format(counter, len(mybufferdict), 100.0 * counter / len(mybufferdict)))
-    else:
+    if not os.path.isfile("{}/{}".format(folderpath, pickle_file)):
         print("parsing and loading in memory..")
         with open("{}/{}".format(folderpath, file), 'r') as f:
             lines = f.readlines()
@@ -198,11 +182,6 @@ for file in sorted(files, key=lambda x: x.rstrip('.csv').split('_')[-1]):
             else:
                 mydict = {}
                 outputsubfolder = "{}/unseen/{}/{}/{}/{}".format(outputfolder, radio, mcc, net, area)
-            try:
-                if not os.path.isdir(outputsubfolder):
-                    os.makedirs(outputsubfolder)
-            except BaseException:
-                raise
             gt_loc = mydict.get(key, {}).get('location', (float('nan'), float('nan')))
             filepath = "{}/{}_{:.6f}_{:.6f}.csv".format(outputsubfolder, cell, gt_loc[0], gt_loc[1])
             if filepath not in mybufferdict:
diff --git a/opencellid_parsing/002.extract_measurements.py b/opencellid_parsing/002.extract_measurements.py
new file mode 100644
index 0000000..0715439
--- /dev/null
+++ b/opencellid_parsing/002.extract_measurements.py
@@ -0,0 +1,68 @@
+#!/usr/bin/python
+
+import os
+import sys
+import pickle
+
+print("load ~55GB data into memory! do not try this if not on server machine")
+
+
+############################
+# extract measurements
+############################
+folderpath = sys.argv[1]
+all_pickle_fp = "{}/all.pickle".format(folderpath)
+
+outputfolder = folderpath.rstrip('/') + '_extracted'
+if not os.path.isdir(outputfolder):
+    os.makedirs(outputfolder)
+
+headline = '#lon,lat,sig,measure_at,upload_at,rating,speed,direction'
+headlines = {
+    'UMTS': headline + ',rnc,cidshort,psc',
+    'GSM': headline + ',timingadv',
+    'LTE': headline + ',timingadv,tac,pci',
+    'CDMA': headline + ',sid,nid,bid',
+}
+
+if os.path.isfile(all_pickle_fp):
+    # load data only
+    data = pickle.load(open(all_pickle_fp), 'rb')
+else:
+    # load and combine data
+    files = [file for file in os.listdir(folderpath) if '_parsed.pickle' in file]
+    data = {}
+    for file in sorted(files, key=lambda x: int(x.rstrip('_parsed.pickle').split('_')[-1])):
+        print("loading {}".format(file))
+        datatmp = pickle.load(open("{}/{}".format(folderpath, file), 'rb'))
+        print("combining..")
+        for key in datatmp:
+            if key not in data:
+                data[key] = datatmp[key]
+            else:
+                data[key][1] += datatmp[key][1]
+
+    pickle.dump(data, open(all_pickle_fp), 'wb')
+
+
+print("extracting each file to disk (only groundtruth we cared)..")
+counter = 0
+for filepath in data:
+    if "with_groundtruth" not in filepath:
+        continue
+    if not os.path.isfile(filepath):
+        outputsubfolder = os.path.dirname(filepath)
+        if not os.path.isdir(outputsubfolder):
+            try:
+                os.makedirs(outputsubfolder)
+            except BaseException:
+                print("err: cannot create folder {}!!!!".format(outputsubfolder))
+                continue
+        with open(filepath, 'w') as of:
+            of.write(headlines.get(data[filepath][0], headline) + '\n')
+    with open(filepath, 'a') as of:
+        for dataline in data[filepath][1]:
+            of.write(dataline + "\n")
+    counter += 1
+    if counter % 1000 == 0:
+        print("progress: {} out of {} files extracted ({:.2f}%)".format(counter, len(data), 100.0 * counter / len(data)))