diff --git a/src/python/run_correctxml.py b/src/python/run_correctxml.py index 87e74b7bffe6fa11b908a3229d5d717a27fadb36..45543847c34d783fb8792afb882c159f7a69ed5a 100644 --- a/src/python/run_correctxml.py +++ b/src/python/run_correctxml.py @@ -50,5 +50,9 @@ for infile in files_proc: #print('Corrected %s' % infile) except: print("File to correct %s prompted an error" % infile) - + +# Commands to get the compressed version of the file +#data/AB/${year}/02_extractedxml.tar.gz +utils_proc.compress_tar(output_file) + print('Total time for correction of year %d: %f' % (int(year_tocomp) ,(time.time() - t1))) \ No newline at end of file diff --git a/src/python/run_extract_origxml.py b/src/python/run_extract_origxml.py index 8af12f7e1519027fe9605a61bb33909bee8816cd..e283788c582df6027b698030f1348f4a66f11342 100644 --- a/src/python/run_extract_origxml.py +++ b/src/python/run_extract_origxml.py @@ -49,5 +49,9 @@ for infile in files_proc: print('Extracted %s' % infile) except: print("File %s prompted an error" % infile) - + +# Commands to get the compressed version of the file +#data/AB/${year}/02_extractedxml.tar.gz +utils_proc.compress_tar(output_file) + print('Total time for year %d: %f' % (int(year_tocomp) ,(time.time() - t1))) \ No newline at end of file diff --git a/src/python/utils_proc.py b/src/python/utils_proc.py index 5cf163fa7149b4209092cb4bcf3624d245d748b2..7348be81153232cfda56abd863a6f957acf9a061 100644 --- a/src/python/utils_proc.py +++ b/src/python/utils_proc.py @@ -144,6 +144,21 @@ def addto_tar(input_file, folder_database, name_file): tf.close() return name_tar +def compress_tar(infile, outname = ''): + if len(outname) == 0: + outname = infile + + # tar -xf data/AB/${year}/02_extractedxml.tar.gz + # tar -czvf data/AB/${year}/02_extractedxml.tar.gz ./${year}/ + # rm -rf ${year} + year = infile.split('/')[-2] + c1 = 'tar -xf ' + infile + call_with_out(c1) + c2 = 'tar -czvf ' + outname + ' ./' + str(year) + '/' + call_with_out(c2) + c3 = 'rm -rf ' + str(year) + call_with_out(c3) + def correct_metadata(year, id_doc, flag_end):