diff --git a/src/python/def_classes.py b/src/python/def_classes.py
index 03355186e36e6c17bca2cb0cfa94da83253d56e3..6ee0909de8db4fe8a185c45145d80cb8a76838c4 100644
--- a/src/python/def_classes.py
+++ b/src/python/def_classes.py
@@ -762,7 +762,7 @@ class Document:
                     #name_outcorrxml = self.name_outcorrxml)
 
         with open('data/lists/notunique.txt', 'a') as f:
-            f.write(' '.join((str(self.year), self.id_doc, '\n')))
+            f.write(' '.join(('\n\n-------', str(self.year), self.id_doc, '\n')))
 
         print('we have a main corr XML file')
         #utils_proc.tar_extractfile(self.name_xml_corr[1], self.folder_database, name_file = self.name_outcorrxml)
diff --git a/src/python/run_extract_discussions.py b/src/python/run_extract_discussions.py
index 0bb90c94bdaa6425eb85826f40cccdf34d336a9f..e8a827d9f3ca5022cc9dc613054c0902698f9755 100644
--- a/src/python/run_extract_discussions.py
+++ b/src/python/run_extract_discussions.py
@@ -26,7 +26,7 @@ from utils_proc import call_with_out
 # specify input and output files
 
 # needed for running in atom, can be ignored
-year = '1925'
+year = '1971'
 input_lastnames = "data/politicians/lastnames/" + year + "_MPs.pickle"
 input_correctedxml = "data/AB/" + year + "/04_correctedxml.tar.gz"
 input_correctedmeta = "data/AB/" + year + "/03_correctedmeta.tar.gz"
@@ -115,8 +115,13 @@ utils_proc.compress_tar(output_annotatedxml)
 
 
 #%%
+with open(input_notnames) as f:
+    list_notnames = f.readlines()
+
+list_notnames = [term.rstrip() for term in list_notnames]
+
 # to test for one file
-file_tarpath = './1925/20029981_datacorr.xml'
+file_tarpath = './1971/20000726f_datacorr.xml'
 
 id_doc = file_tarpath.split('/')[-1][:8]
 
@@ -148,3 +153,8 @@ file_doc.check_discussion()
 str_date = '1925-12-09 08:00'
 import datetime
 datetime.datetime.strptime(str_date, '%Y-%m-%d %H:%M')
+
+
+
+listilist = ['a', 'b', 'c', 'd']
+listilist[0,2]
diff --git a/src/python/utils_annot.py b/src/python/utils_annot.py
index 525eeb8288f2d24b8f9a2e0c6a4c4f9114d13faf..ceedfc9b8d8c44d20972e5a6d29267f66efa0bb6 100644
--- a/src/python/utils_annot.py
+++ b/src/python/utils_annot.py
@@ -249,6 +249,7 @@ def label_speechstart(XML_new, ind_p, ind_t, text, ind_tl_colon, df_names, list_
 
     # initialize flag
     this_is_speech = False
+    flag_print = False
 
     # font text end
     fontend = '[/font]'
@@ -264,10 +265,11 @@ def label_speechstart(XML_new, ind_p, ind_t, text, ind_tl_colon, df_names, list_
         text_start = re.sub(r'[\(\)]','',text[:colon_index_text])
         list_oi = tokenizer.tokenize(text_start)
 
-        if len(list_oi) > 5:
+        if len(list_oi) > 4:
             with open('data/lists/notunique.txt', 'a') as f:
-                f.write(' '.join((str(list_oi), '\n')))
-                
+                f.write(' '.join((str(list_oi), str(len(list_oi)), '\n')))
+            flag_print = True
+
         if bln_print:
             print('possible speech start: ', list_oi)
 
@@ -286,17 +288,22 @@ def label_speechstart(XML_new, ind_p, ind_t, text, ind_tl_colon, df_names, list_
 
         # remove single characters
         # TODO: might need to be changed for fractions (some fractions are abbreviated as single letters)
+        # TODO: needs to be changed to include 'I' for Minderheit I 1891/20000093
         # TODO: maybe exclude I and A to account for Appenzell
         list_oi = [term for term in list_oi if len(term)>1]
 
+        if len(list_oi) > 4 or flag_print:
+            with open('data/lists/notunique.txt', 'a') as f:
+                f.write(' '.join((str(list_oi), str(len(list_oi)), '\n')))
+
         # if possible, find a name from the list
         str_name, str_role, list_uniqueID, str_canton = find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str_council, str_date, bln_print=False)
         if bln_print:
             print('name', str_name, 'role', str_role)
 
-        if len(list_uniqueID) > 1:
+        if len(list_uniqueID) > 1 or flag_print:
             with open('data/lists/notunique.txt', 'a') as f:
-                f.write(' '.join((str_name, str_role, str(list_uniqueID), str_date, str(ind_p), '\n')))
+                f.write(' '.join((str_name, str_role, str(list_uniqueID), str_date, str(ind_p), str(list_oi), '\n')))
 
         # get rid of 'Präsident stimmt nicht Président ne vote pas'
         if set(str_role.split()).intersection(set(['Präsident', 'Präsidentin', 'Président', 'Présidente'])) and not str_name:
@@ -613,25 +620,31 @@ def find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str
                     # get list of cities
                     list_cities = [entry for entry in df_temp[canton_type] if str_canton in get_cities([entry])]
                     str_citizenship = ''
-                    try:
-                        if len(list_cities) == 1:
-                            str_citizenship = list_cities[0]
-                    except:
-                        print('found no or more than one person with citizenship', str_canton, str_name)
-                        pass
+                    if len(list_cities) == 1:
+                        str_citizenship = list_cities[0]
+                        list_temp = list(df_temp.loc[(df_temp['nameType']==name_type) & (df_temp['shortName']==str_name) & (df_temp[canton_type]==str_citizenship)].iloc[:, df_temp.columns.get_loc('uniqueIndex')])
+                        str_completeName = df_temp['completeName'].loc[(df_temp['nameType']==name_type) & (df_temp['shortName']==str_name) & (df_temp[canton_type]==str_citizenship)].iloc[0]
+                    elif len(list_cities) > 1:
+                        print('found more than one person with citizenship', str_canton, str_name, list_cities)
+                        # TODO what happens with these:?
+                        list_temp = list(df_names.loc[(df_names['nameType']==name_type) & (df_names['shortName']==str_name)].iloc[:, df_names.columns.get_loc('uniqueIndex')])
+                        str_completeName = str_name + ' (CANTON MISSING)'
+                    else:
+                        print('found no person with citizenship', str_canton, str_name, list_cities)
 
-                    list_temp = list(df_temp.loc[(df_temp['nameType']==name_type) & (df_temp['shortName']==str_name) & (df_temp[canton_type]==str_citizenship)].iloc[:, df_temp.columns.get_loc('uniqueIndex')])
-                    str_completeName = df_temp['completeName'].loc[(df_temp['nameType']==name_type) & (df_temp['shortName']==str_name) & (df_temp[canton_type]==str_citizenship)].iloc[0]
 
                 else:
                     list_temp = list(df_temp.loc[(df_temp['nameType']==name_type) & (df_temp['shortName']==str_name) & (df_temp[canton_type]==str_canton)].iloc[:, df_temp.columns.get_loc('uniqueIndex')])
                     str_completeName = df_temp['completeName'].loc[(df_temp['nameType']==name_type) & (df_temp['shortName']==str_name) & (df_temp[canton_type]==str_canton)].iloc[0]
 
-                print(list_temp, list_uniqueID)
+                print(list_temp, list_uniqueID, str_completeName)
 
                 if len(list_temp) > 0:
                     list_uniqueID = update_list_uniqueID(list_uniqueID, list_temp, name_type)
-                    if str_completeName.split(' ')[0] == str_name:
+                    print(str_completeName)
+                    if 'CANTON MISSING' in str_completeName:
+                        str_name = add_to_string('', str_completeName)
+                    elif str_completeName.split(' ')[0] == str_name:
                         str_name = add_to_string('', str_completeName)
                     else:
                         str_name = add_to_string(str_name, str_completeName)