diff --git a/.gitignore b/.gitignore index 29d3d1c9bf11eded6327850ede1581914c9eca09..40a6fbfd83703412e219d7f862f006d2db1c5852 100644 --- a/.gitignore +++ b/.gitignore @@ -285,4 +285,4 @@ data/train_NER/1[0-9][0-9][0-9]_20[0-9][0-9][0-9][0-9][0-9][0-9]_french.txt # notunique files -data/lists/notunique*.txt +data/lists/notunique_*.txt diff --git a/src/python/run_extract_discussions.py b/src/python/run_extract_discussions.py index e8a827d9f3ca5022cc9dc613054c0902698f9755..15826cc8a065ce58a7f8613c616b9eeb0c9585d8 100644 --- a/src/python/run_extract_discussions.py +++ b/src/python/run_extract_discussions.py @@ -26,7 +26,7 @@ from utils_proc import call_with_out # specify input and output files # needed for running in atom, can be ignored -year = '1971' +year = '1951' input_lastnames = "data/politicians/lastnames/" + year + "_MPs.pickle" input_correctedxml = "data/AB/" + year + "/04_correctedxml.tar.gz" input_correctedmeta = "data/AB/" + year + "/03_correctedmeta.tar.gz" @@ -121,7 +121,7 @@ with open(input_notnames) as f: list_notnames = [term.rstrip() for term in list_notnames] # to test for one file -file_tarpath = './1971/20000726f_datacorr.xml' +file_tarpath = './1951/20035006_datacorr.xml' id_doc = file_tarpath.split('/')[-1][:8] @@ -158,3 +158,10 @@ datetime.datetime.strptime(str_date, '%Y-%m-%d %H:%M') listilist = ['a', 'b', 'c', 'd'] listilist[0,2] +# OPTIMIZE + + + + +if 'ab' in 'abc': + print('yay')