From 2ba372616477258b2c655ca4ab43cff0f2973731 Mon Sep 17 00:00:00 2001 From: Lilian Gasser <gasserli@ethz.ch> Date: Wed, 30 Jan 2019 10:23:27 +0100 Subject: [PATCH] update gitignore, minor change to runextractdiscussions --- .gitignore | 2 +- src/python/run_extract_discussions.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 29d3d1c9..40a6fbfd 100644 --- a/.gitignore +++ b/.gitignore @@ -285,4 +285,4 @@ data/train_NER/1[0-9][0-9][0-9]_20[0-9][0-9][0-9][0-9][0-9][0-9]_french.txt # notunique files -data/lists/notunique*.txt +data/lists/notunique_*.txt diff --git a/src/python/run_extract_discussions.py b/src/python/run_extract_discussions.py index e8a827d9..15826cc8 100644 --- a/src/python/run_extract_discussions.py +++ b/src/python/run_extract_discussions.py @@ -26,7 +26,7 @@ from utils_proc import call_with_out # specify input and output files # needed for running in atom, can be ignored -year = '1971' +year = '1951' input_lastnames = "data/politicians/lastnames/" + year + "_MPs.pickle" input_correctedxml = "data/AB/" + year + "/04_correctedxml.tar.gz" input_correctedmeta = "data/AB/" + year + "/03_correctedmeta.tar.gz" @@ -121,7 +121,7 @@ with open(input_notnames) as f: list_notnames = [term.rstrip() for term in list_notnames] # to test for one file -file_tarpath = './1971/20000726f_datacorr.xml' +file_tarpath = './1951/20035006_datacorr.xml' id_doc = file_tarpath.split('/')[-1][:8] @@ -158,3 +158,10 @@ datetime.datetime.strptime(str_date, '%Y-%m-%d %H:%M') listilist = ['a', 'b', 'c', 'd'] listilist[0,2] +# OPTIMIZE + + + + +if 'ab' in 'abc': + print('yay') -- GitLab