diff --git a/data/lists/not_names.txt b/data/lists/not_names.txt
index cd2fce5572016679ab58dc253b5314bdcbca7265..886d368ba157da380457bf44d024fe02d19450f9 100644
--- a/data/lists/not_names.txt
+++ b/data/lists/not_names.txt
@@ -12,6 +12,7 @@ Herr
 Kasse
 nicht
 Rath
+Schrit
 Seite
 selber
 Steuer
diff --git a/data/lists/wrongly_identified_speakers.txt b/data/lists/wrongly_identified_speakers.txt
index 2c33e4078424c9470845645b17210d3830e96cd9..11b584bb751ae251dcf2cc4b6c15ecf4440757bd 100644
--- a/data/lists/wrongly_identified_speakers.txt
+++ b/data/lists/wrongly_identified_speakers.txt
@@ -2,6 +2,7 @@
 also check for council:
 -----------------------
 1925/20029870 and several more: Baumann, Berichterstatter --> not uniquely identified but there is only one SR Baumann
+1925/20029937: Schneider, Berichterstatter --> NR, not SR
 
 
 one MP not active in whole year, leads to other not uniquely identified
@@ -14,7 +15,13 @@ one MP not active in whole year, leads to other not uniquely identified
 
 identified as speech start but is in text:
 ------------------------------------------
+do I really need to look on the first two lines? maybe one is sufficient?
+look for typical terms such as gestellt, gesagt, etc.
+
+1891/20026455: Ein weiterer Antrag ist derjenige des Hrn. Dufour. Herr Dufour hat den Antrag gestellt:
+1891/20026465: Wir haben nun aber, dem Gedankengang des Hrn. BÃ¼hler folgend, die ErklÃ¤rung gewÃ¤hlt:
 1891/20026489: Um jeden Zweifel zu heben, beantrage ich Ihnen folgende Redaktion des Art. 2 :
+1894/20026607: MÃ¼ller gegenÃ¼ber drei anderen durchgedrungen, welche lautete:
 1925/20029903: Nun hat Herr Nationalrat Huber schon am 5. De-zember 1924 folgende Kleine Anfrage gestellt:
 1925/20029863: ganz gut gehen. Es ist ja wahr, was Herr BrÃ¼gger gesagt hat: --> finds Lanz and BrÃ¼gger
 1925/20029917: MÃ¶gen Sie nun aber denken wie Herr Oberst BrÃ¼gger oder mÃ¶gen Sie denken wie ich: --> identified as speech start but is in text
@@ -23,6 +30,18 @@ identified as speech start but is in text:
 1925/20029981: BrÃ¼gger mÃ¶chte ich sagen: --> identified as speech start but is in text
 
 
+wrongly spelled city
+--------------------
+1925/20029963: Jenny Ennend (instead of Ennenda)
+1925/20029995,96: Keller Zurich (instead of ZÃ¼rich)
+
+
+
+Appenzeller
+-----------
+1894/20026597: Sonderegger
+1894/20026618: Sonderegger
+
 
 
 some other persons wrongly identified as MP
diff --git a/src/python/run_extract_discussions.py b/src/python/run_extract_discussions.py
index d7fd239333411cce94dc709d9155f1c96863c42e..2864c981f394c9a348da2f2803c66b39a3e4dac0 100644
--- a/src/python/run_extract_discussions.py
+++ b/src/python/run_extract_discussions.py
@@ -4,8 +4,8 @@
 # Code to extract discussions from corrected XML files
 #%%
 # to work with atom
-#%load_ext autoreload
-#%autoreload 2
+%load_ext autoreload
+%autoreload 2
 
 import pickle
 import time
@@ -26,7 +26,7 @@ from utils_proc import call_with_out
 # specify input and output files
 
 # needed for running in atom, can be ignored
-year = '1925'
+year = '1894'
 input_lastnames = "data/politicians/lastnames/" + year + "_lastnames.pickle"
 input_correctedxml = "data/AB/" + year + "/04_correctedxml.tar.gz"
 input_correctedmeta = "data/AB/" + year + "/03_correctedmeta.tar.gz"
@@ -111,20 +111,21 @@ utils_proc.compress_tar(output_annotatedxml)
 
 
 #%%
-## to test for one file
-#file_tarpath = './1893/20026526_datacorr.xml'
-#
-#id_doc = file_tarpath.split('/')[-1][:8]
-#
-## instantiate document object (always from original pdf)
-#infile_aux = year + '/' + id_doc + '.pdf'
-#file_doc = defc.Document(infile_aux, folder_database)
-#
-#if (file_doc.check_discussion()) and (id_doc not in ['20032463', '20032952', '20014332']):
-    #print(id_doc + '\n')
-#
-    #file_doc.df_lastnames = df_lastnames
-    #file_doc.annotate_xml()
+# to test for one file
+file_tarpath = './1925/20029981_datacorr.xml'
+
+id_doc = file_tarpath.split('/')[-1][:8]
+
+# instantiate document object (always from original pdf)
+infile_aux = year + '/' + id_doc + '.pdf'
+file_doc = defc.Document(infile_aux, folder_database)
+
+if (file_doc.check_discussion()) and (id_doc not in ['20032463', '20032952', '20014332']):
+    print(id_doc + '\n')
+
+    file_doc.df_lastnames = df_lastnames
+    file_doc.list_notnames = list_notnames
+    file_doc.annotate_xml()
 
 
 #%%
@@ -134,3 +135,16 @@ utils_proc.compress_tar(output_annotatedxml)
 #id_doc
 
 #len(files_to_process)
+list_bla = [1, 2,3]
+list_bla.extend([4, 5])
+list_bla
+
+if 3 in [1,3, 4]:
+    print('yes')
+
+
+#%%
+
+str_name = 'Blumer' # (CANTON MISSING)'
+
+print(str_name.split(' '))
diff --git a/src/python/utils_annot.py b/src/python/utils_annot.py
index f9148b8da008cf3b26d3ce19570949c51ff4bfcb..8be49c7f76fa69ea0469ec99b70efa42f54b72d7 100644
--- a/src/python/utils_annot.py
+++ b/src/python/utils_annot.py
@@ -522,7 +522,7 @@ def find_names(list_oi, str_name, str_role, list_uniqueID, df_names, str_canton,
                   'Bundesrat', 'Bundesrath', 'BundesrÃ¤tin', 'conseiller fÃ©dÃ©ral',
                   'VizeprÃ¤sident']
 
-    list_cantonname, list_cantonabbr, list_citizenship, list_firstname = get_list_cantons(df_names)
+    list_roles_ext = ['Mehrheit', 'Minderheit', 'majoritÃ©', 'minoritÃ©', 'deutscher', 'deutsche', 'franÃ§ais', 'franÃ§aise', 'Kommission', 'commission']
 
     # extract list and array of last names
     list_all_names = list(df_names['name_short'])
@@ -537,7 +537,21 @@ def find_names(list_oi, str_name, str_role, list_uniqueID, df_names, str_canton,
         if bln_print:
             print('now is about: ------', term)
 
-        if name_type == 'canton':
+        if term in list_roles:
+            # get correct name and uniqueID, or role, for that term
+            str_name, str_role, list_uniqueID, name_type = get_string(term, str_name, str_role, list_uniqueID, str_canton)
+
+            if bln_print:
+                print('found a role', term)
+
+            # TODO: also look for similar terms (misspellings)
+
+        elif term in list_roles_ext:
+            pass
+            # TODO: extract whether it is minority or majority and save that information
+
+        elif name_type == 'canton':
+            list_cantonname, list_cantonabbr, list_citizenship, list_firstname = get_list_cantons(df_names, str_name.split(' ')[0])
             canton_type = ''
             if term in list_cantonname:
                 str_canton = term
@@ -559,20 +573,34 @@ def find_names(list_oi, str_name, str_role, list_uniqueID, df_names, str_canton,
             else:
                 print('might be a canton', term, list_oi, str_name, str_role)
 
+            # if a canton or similar was found
             if canton_type:
                 # get rid of CANTON MISSING
                 str_name = str_name.split(' ')[0]
                 # extract uniqueID
-#                list_temp = []
-                list_temp = list(df_names.loc[(df_names['type']==name_type) & (df_names['name_short']==str_name) & (df_names[canton_type]==str_canton)].iloc[:, df_names.columns.get_loc('uniqueIndex')])
+                # if Citizenship, do proper comparison
+                if canton_type == 'Citizenship':
+                    df_temp = df_names.loc[(df_names['type']==name_type) & (df_names['name_short']==str_name)]
+                    list_citizenship = [term for term in df_temp[canton_type] if str_canton in tokenizer_canton.tokenize(term)]
+                    try:
+                        if len(list_citizenship) == 1:
+                            str_citizenship = list_citizenship[0]
+                    except:
+                        print('found no or more than one person with citizenship', str_canton, str_name)
+                        pass
+
+                    list_temp = list(df_names.loc[(df_names['type']==name_type) & (df_names['name_short']==str_name) & (df_names[canton_type]==str_citizenship)].iloc[:, df_names.columns.get_loc('uniqueIndex')])
+
+                else:
+                    list_temp = list(df_names.loc[(df_names['type']==name_type) & (df_names['name_short']==str_name) & (df_names[canton_type]==str_canton)].iloc[:, df_names.columns.get_loc('uniqueIndex')])
                 print(list_temp, list_uniqueID)
                 list_uniqueID = list_temp
 
         # if term is not easily mistaken as a name (avoid false positives)
-        if term not in list_notnames:
+        elif term not in list_notnames:
 
-            # if term is in the list of all names and roles
-            if term in (list_all_names + list_roles):
+            # if term is in the list of all names
+            if term in list_all_names:
                 # get correct name and uniqueID, or role, for that term
                 str_name, str_role, list_uniqueID, name_type = get_string(term, str_name, str_role, list_uniqueID, str_canton)
 
@@ -841,9 +869,18 @@ def dict_only_text(dictionary):
 
 
 # function to get list of places
-def get_list_cantons(df_names):
-    df_temp = df_names.loc[df_names['type']=='canton']
+def get_list_cantons(df_names, str_name = ''):
+    if str_name:
+        df_temp = df_names.loc[(df_names['type']=='canton') & (df_names['name_short']==str_name)]
+    else:
+        df_temp = df_names.loc[df_names['type']=='canton']
+    print(df_temp)
     list_cantonname = list(df_temp['CantonName'])
+    for canton in ['Basel-Stadt', 'Basel-Landschaft']:
+        if canton in list_cantonname:
+            list_cantonname.extend(['Basel'])
+    if 'GraubÃ¼nden' in list_cantonname:
+        list_cantonname.extend(['BÃ¼nden'])
     list_cantonabbr = list(df_temp['CantonAbbreviation'])
     list_citizenship = list(df_temp['Citizenship'])
     list_citizenship = [city[:-5] for item in list_citizenship for city in item.split(',')]