From 829fc6672ba1d66b32de492c764d71e7aba0cfb3 Mon Sep 17 00:00:00 2001
From: Luis Salamanca <luis.salamanca@sdsc.ethz.ch>
Date: Fri, 30 Nov 2018 13:12:33 +0000
Subject: [PATCH] Corrections to scripts

---
 src/python/run_correctxml.py      |  6 +++++-
 src/python/run_extract_origxml.py |  6 +++++-
 src/python/utils_proc.py          | 15 +++++++++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/python/run_correctxml.py b/src/python/run_correctxml.py
index 87e74b7b..45543847 100644
--- a/src/python/run_correctxml.py
+++ b/src/python/run_correctxml.py
@@ -50,5 +50,9 @@ for infile in files_proc:
             #print('Corrected %s' % infile)
         except:
             print("File to correct %s prompted an error" % infile)
-    
+
+# Commands to get the compressed version of the file
+#data/AB/${year}/02_extractedxml.tar.gz
+utils_proc.compress_tar(output_file)            
+            
 print('Total time for correction of year %d: %f' % (int(year_tocomp) ,(time.time() - t1)))
\ No newline at end of file
diff --git a/src/python/run_extract_origxml.py b/src/python/run_extract_origxml.py
index 8af12f7e..e283788c 100644
--- a/src/python/run_extract_origxml.py
+++ b/src/python/run_extract_origxml.py
@@ -49,5 +49,9 @@ for infile in files_proc:
             print('Extracted %s' % infile)
         except:
             print("File %s prompted an error" % infile)
-    
+
+# Commands to get the compressed version of the file
+#data/AB/${year}/02_extractedxml.tar.gz
+utils_proc.compress_tar(output_file)                
+            
 print('Total time for year %d: %f' % (int(year_tocomp) ,(time.time() - t1)))
\ No newline at end of file
diff --git a/src/python/utils_proc.py b/src/python/utils_proc.py
index 5cf163fa..7348be81 100644
--- a/src/python/utils_proc.py
+++ b/src/python/utils_proc.py
@@ -144,6 +144,21 @@ def addto_tar(input_file, folder_database, name_file):
     tf.close()
     return name_tar
 
+def compress_tar(infile, outname = ''):
+    if len(outname) == 0:
+        outname = infile
+    
+    # tar -xf data/AB/${year}/02_extractedxml.tar.gz
+    # tar -czvf data/AB/${year}/02_extractedxml.tar.gz ./${year}/
+    # rm -rf ${year}
+    year = infile.split('/')[-2] 
+    c1 = 'tar -xf ' + infile
+    call_with_out(c1)
+    c2 = 'tar -czvf ' + outname + ' ./' + str(year) + '/'
+    call_with_out(c2)
+    c3 = 'rm -rf ' + str(year)
+    call_with_out(c3)
+    
 
 def correct_metadata(year, id_doc, flag_end):
     
-- 
GitLab