diff --git a/data/lists/wrongly_identified_speakers.txt b/data/lists/wrongly_identified_speakers.txt
index fb0fb51b4accb9b421ae4ed7dcb4fe31e1d64688..b2bd1b7706cfbab542b97012fb2727aaa9b3f2b8 100644
--- a/data/lists/wrongly_identified_speakers.txt
+++ b/data/lists/wrongly_identified_speakers.txt
@@ -1,21 +1,24 @@
+speaker not identifiable:
+-------------------------
+1891/20026455: Dufour (CANTON MISSING)  [1420, 1421] 1891-06-22 15:00 7 twice in same document
+1891/20026465: Zweifel one time not identified --> is it a different one (not the Landammann) or was he already mentioned before?
+1925/20029836,37,87: Seiler (CANTON MISSING) Berichterstatter [4810, 4815] 1925-03-28 00:00 9
+1925/20029943: Welti (CANTON MISSING)  [5655, 5656] 1925-09-29 00:00 6
 
-also check for council:
------------------------
-1925/20029870 and several more: Baumann, Berichterstatter --> not uniquely identified but there is only one SR Baumann --> solved!
-1925/20029937: Schneider, Berichterstatter --> NR, not SR --> solved!
 
+speaker not uniquely identified when he spoke the second time:
+--------------------------------------------------------------
+1925/20029924: Keller-Aargau Berichterstatter (first time), Keller Berichterstatter (after)
+   1925/20029928,29: Keller Berichterstatter (also first time), maybe check title of document...
 
-one MP not active in whole year, leads to other not uniquely identified
------------------------------------------------------------------------
-1925/20029860ff: Keller-Aargau (in March, there is only one Keller-Aargau, in June, another one joins --> finds two!) --> solved!
-1925/20029967: Seiler (in December, the second Seiler already left) --> finds two!) --> solved!
-1925/20029967: Huber (in December, the second Huber already left) --> finds two!) --> solved because only NR!
-1925/20029882: Naine (in June, there is only one Naine, in December, another one joins --> finds two!) also in ...96, ...97, etc. --> solved!
+identifier is split into two words
+----------------------------------
+1925/20029945: found a name: Schmid-Oberentf elden ['Schmid', 'Oberentf', 'elden'] 0 Schmid (CANTON MISSING)  [4639, 4660]
 
 
 identified as speech start but is in text:
 ------------------------------------------
-do I really need to look on the first two lines? maybe one is sufficient?
+do I really need to look on the first two lines? maybe one is sufficient? --> no, it needs two lines
 look for typical terms such as gestellt, gesagt, etc.
 
 1891/20026455: Ein weiterer Antrag ist derjenige des Hrn. Dufour. Herr Dufour hat den Antrag gestellt:
@@ -24,9 +27,10 @@ look for typical terms such as gestellt, gesagt, etc.
 1894/20026607: Müller gegenüber drei anderen durchgedrungen, welche lautete:
 1925/20029903: Nun hat Herr Nationalrat Huber schon am 5. De-zember 1924 folgende Kleine Anfrage gestellt:
 1925/20029863: ganz gut gehen. Es ist ja wahr, was Herr Brügger gesagt hat: --> finds Lanz and Brügger
+1925/20029891: J'en  viens  enfin  à  M. Belmont.  M. Belmont  a posé  cette  question --> finds Belmont twice
 1925/20029917: Mögen Sie nun aber denken wie Herr Oberst Brügger oder mögen Sie denken wie ich: --> identified as speech start but is in text
 1925/20029917: Herr Hauser sagt:
-1925/20029978: Das ist, was Herr Charles Naine gesagt hat. Herr Hugglersagt:
+1925/20029978: Das ist, was Herr Charles Naine gesagt hat. Herr Hugglersagt:    and a second time in the same document with Naine
 1925/20029981: Brügger möchte ich sagen:
 1971/20000663: de MM. Knüsel et Leu (there must be more speech starts, this is from a list of cantons and people inside a speech, !!! Layout)
 1971/20000007: La  seconde  réaction  qu'a  suscité chez  moi  l'intervention  de  M. Weber  est  le  doute:
@@ -37,18 +41,6 @@ look for typical terms such as gestellt, gesagt, etc.
 1971/20000024: Herr  Kollege  Heimann  stellt  sich  schliesslich  gegen einen  Finanzausgleich  mit  dem  Hinweis
 
 
-wrongly spelled city
---------------------
-1925/20029963: Jenny Ennend (instead of Ennenda)
-1925/20029995,96: Keller Zurich (instead of Zürich)
-1971/? : Berne instead of Bern --> solved with using get_approximate_term for cantons
-
-
-doubled double names:
----------------------
-1971/20000010: Meyer-Boller --> solved!
-
-
 term very similar to one name is actually another name
 ------------------------------------------------------
 1925/20029863: ganz --> finds Lanz, there is a Ganz
@@ -57,6 +49,7 @@ term very similar to one name is actually another name
 
 term is a name
 --------------
+1891/20026489: Um  jeden Zweifel  zu  heben,  beantrage  ich Ihnen folgende  Redaktion  des  Art.  2 --> finds Zweifel
 1971/20000010: Ganz wenige Einzelfragen --> finds Ganz
 1971/20000024: Politisch  gesehen  ist  es  doch  ganz  einfach  so --> finds Ganz
 
@@ -72,7 +65,33 @@ Appenzeller
 1894/20026618: Sonderegger
 
 
-
 some other persons wrongly identified as MP
 -------------------------------------------
 1925/20029833: Sauser-Hall (not a MP)--> Hauser
+
+
+
+solved: also check for council:
+-----------------------
+1925/20029870 and several more: Baumann, Berichterstatter --> not uniquely identified but there is only one SR Baumann --> solved!
+1925/20029937: Schneider, Berichterstatter --> NR, not SR --> solved!
+
+
+solved: one MP not active in whole year, leads to other not uniquely identified
+-----------------------------------------------------------------------
+1925/20029860ff: Keller-Aargau (in March, there is only one Keller-Aargau, in June, another one joins --> finds two!) --> solved!
+1925/20029967: Seiler (in December, the second Seiler already left) --> finds two!) --> solved!
+1925/20029967: Huber (in December, the second Huber already left) --> finds two!) --> solved because only NR!
+1925/20029882: Naine (in June, there is only one Naine, in December, another one joins --> finds two!) also in ...96, ...97, etc. --> solved!
+
+
+solved: doubled double names:
+---------------------
+1971/20000010: Meyer-Boller --> solved!
+
+
+solved: wrongly spelled city
+--------------------
+1925/20029963: Jenny Ennend (instead of Ennenda) --> solved!
+1925/20029995,96: Keller Zurich (instead of Zürich) --> solved!
+1971/? : Berne instead of Bern --> solved with using get_approximate_term for cantons
diff --git a/src/python/def_classes.py b/src/python/def_classes.py
index b572503a90d9df3b4d93ca89809386f790f951c1..03355186e36e6c17bca2cb0cfa94da83253d56e3 100644
--- a/src/python/def_classes.py
+++ b/src/python/def_classes.py
@@ -761,6 +761,8 @@ class Document:
                     #pages = 'all', suffix_xml = '_data', name_outxml = self.name_outxml,
                     #name_outcorrxml = self.name_outcorrxml)
 
+        with open('data/lists/notunique.txt', 'a') as f:
+            f.write(' '.join((str(self.year), self.id_doc, '\n')))
 
         print('we have a main corr XML file')
         #utils_proc.tar_extractfile(self.name_xml_corr[1], self.folder_database, name_file = self.name_outcorrxml)
diff --git a/src/python/run_extract_discussions.py b/src/python/run_extract_discussions.py
index 23747f0fbb6a806184581477f67ae316b98665c7..0bb90c94bdaa6425eb85826f40cccdf34d336a9f 100644
--- a/src/python/run_extract_discussions.py
+++ b/src/python/run_extract_discussions.py
@@ -116,7 +116,7 @@ utils_proc.compress_tar(output_annotatedxml)
 
 #%%
 # to test for one file
-file_tarpath = './1925/20029967_datacorr.xml'
+file_tarpath = './1925/20029981_datacorr.xml'
 
 id_doc = file_tarpath.split('/')[-1][:8]
 
diff --git a/src/python/utils_annot.py b/src/python/utils_annot.py
index 0c19077eedca544c6b616c4cf2c01612dfcefc0e..525eeb8288f2d24b8f9a2e0c6a4c4f9114d13faf 100644
--- a/src/python/utils_annot.py
+++ b/src/python/utils_annot.py
@@ -263,6 +263,11 @@ def label_speechstart(XML_new, ind_p, ind_t, text, ind_tl_colon, df_names, list_
         # look at first few terms of that textbox
         text_start = re.sub(r'[\(\)]','',text[:colon_index_text])
         list_oi = tokenizer.tokenize(text_start)
+
+        if len(list_oi) > 5:
+            with open('data/lists/notunique.txt', 'a') as f:
+                f.write(' '.join((str(list_oi), '\n')))
+                
         if bln_print:
             print('possible speech start: ', list_oi)
 
@@ -289,6 +294,10 @@ def label_speechstart(XML_new, ind_p, ind_t, text, ind_tl_colon, df_names, list_
         if bln_print:
             print('name', str_name, 'role', str_role)
 
+        if len(list_uniqueID) > 1:
+            with open('data/lists/notunique.txt', 'a') as f:
+                f.write(' '.join((str_name, str_role, str(list_uniqueID), str_date, str(ind_p), '\n')))
+
         # get rid of 'Präsident stimmt nicht Président ne vote pas'
         if set(str_role.split()).intersection(set(['Präsident', 'Präsidentin', 'Président', 'Présidente'])) and not str_name:
             if set(['stimmt', 'nicht', 'vote', 'pas']).intersection(list_oi):
@@ -570,38 +579,27 @@ def find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str
 
         # cannot happen for the first term in list_oi
         elif name_type == 'canton':
-            list_cantonname, list_cantonabbr, list_citizenship, list_firstname, list_additionalInfo = get_list_cantons(df_names, str_name.split(' ')[0], str_council)
+            list_cantons = get_list_cantons(df_names, str_name.split(' ')[0], str_council)
             canton_type = ''
-            if term in list_cantonname:
-                str_canton = term
-                canton_type = 'CantonName'
-                print('!!! is a canton', term, list_oi, str_name, str_role)
-            elif term in list_cantonabbr:
-                str_canton = term
-                canton_type = 'CantonAbbr'
-                print('!!! is a canton', term, list_oi, str_name, str_role)
-            elif term in list_citizenship:
-                str_canton = term
-                canton_type = 'Citizenship'
-                print('!!! is a canton', term, list_oi, str_name, str_role)
-            elif term in list_firstname:
-                str_canton = term
-                canton_type = 'FirstName'
-                print('!!! is a canton', term, list_oi, str_name, str_role)
-            elif term in list_additionalInfo:
-                str_canton = term
-                canton_type = 'additionalInfo'
-                print('!!! is a canton', term, list_oi, str_name, str_role)
-
-            else:
+            for list_, type_ in list_cantons:
+                if term in list_:
+                    str_canton = term
+                    canton_type = type_
+                    print('!!! is a canton', term, list_oi, str_name, str_role)
+                    break
+
+            # if person was not uniquely identified, check for misspellings
+            if not canton_type:
                 # look for similar names based on (normalized) Damerau-Levenshtein distance
-                # TODO: might needs to be extended for other than cantonname
-                term_approx = get_approximate_term(term, np.array(list_cantonname))
-                if term_approx:
-                    str_canton = term_approx
-                    canton_type = 'CantonName'
-
-                print('might be a canton:', term, list_oi, str_name, str_role, term_approx)
+                # only look at cantonname, citizenship and additionalinfo
+                list_cantons_approx = [list_cantons[i] for i in (0, 2, 4)]
+                for list_, type_ in list_cantons_approx:
+                    term_approx = get_approximate_term(term, np.array(list_))
+                    if term_approx:
+                        str_canton = term_approx
+                        canton_type = type_
+                        print('!!! is a canton', term, list_oi, str_name, str_role)
+                        break
 
             # if a canton or similar was found
             if canton_type:
@@ -638,6 +636,9 @@ def find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str
                     else:
                         str_name = add_to_string(str_name, str_completeName)
 
+            else:
+                print('could not be identified as a canton:', term, list_oi, str_name, str_role)
+
         # if term is not easily mistaken as a name (avoid false positives)
         elif term not in list_notnames:
 
@@ -681,7 +682,7 @@ def find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str
                 else:
                     # check if person can be identified from date of discussion
                     # exclude people that joined after date of discussion
-                    df_temp_before = df_temp[pd.to_datetime(df_temp['DateJoining']) <= datetime.datetime.strptime(str_date, '%Y-%m-%d %H:%M')]
+                    df_temp_before = df_temp[pd.to_datetime(df_temp['DateJoining'], format='%d.%m.%Y') <= datetime.datetime.strptime(str_date, '%Y-%m-%d %H:%M')]
                     if df_temp_before.shape[0] == 1:
                         list_temp = list(df_temp_before['uniqueIndex'])
                         str_completeName = df_temp_before['completeName'].iloc[0]
@@ -693,7 +694,7 @@ def find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str
                             str_name = add_to_string(str_name, str_completeName)
 
                     # exclude people that left before date of discussion
-                    df_temp_after = df_temp[pd.to_datetime(df_temp['DateLeaving']) >= datetime.datetime.strptime(str_date, '%Y-%m-%d %H:%M')]
+                    df_temp_after = df_temp[pd.to_datetime(df_temp['DateLeaving'], format='%d.%m.%Y') >= datetime.datetime.strptime(str_date, '%Y-%m-%d %H:%M')]
                     if df_temp_after.shape[0] == 1:
                         list_temp = list(df_temp_after['uniqueIndex'])
                         str_completeName = df_temp_after['completeName'].iloc[0]
@@ -797,25 +798,42 @@ def get_df_temp_canton(df_names, str_name, str_council):
 # function to get list of places
 def get_list_cantons(df_names, str_name, str_council = ''):
 
+    # specify strings as they are used in Ratsmitglieder_1848_DE_corr.xlsx and therefore in df_names
+    str_CantonName = 'CantonName'
+    str_CantonAbbreviation = 'CantonAbbreviation'
+    str_Citizenship = 'Citizenship'
+    str_FirstName = 'FirstName'
+    str_additionalInfo = 'additionalInfo'
+
+    # get dataframe
     df_temp = get_df_temp_canton(df_names, str_name, str_council)
 
     # list of cantons
-    list_cantonname = list(df_temp['CantonName'])
+    list_cantonname = list(df_temp[str_CantonName])
 
     # list of canton abbreviations
-    list_cantonabbr = list(df_temp['CantonAbbreviation'])
+    list_cantonabbr = list(df_temp[str_CantonAbbreviation])
 
     # list of citizenships
-    list_citizenship = list(df_temp['Citizenship'])
+    list_citizenship = list(df_temp[str_Citizenship])
     list_citizenship = get_cities(list_citizenship)
 
     # list of first names
-    list_firstname = list(df_temp['FirstName'])
+    list_firstname = list(df_temp[str_FirstName])
 
     # list of additional information
-    list_additionalInfo = list(df_temp['additionalInfo'])
-
-    return list_cantonname, list_cantonabbr, list_citizenship, list_firstname, list_additionalInfo
+    list_additionalInfo = list(df_temp[str_additionalInfo])
+
+    # generate list of cantons including string
+    list_cantons = [(list_cantonname, str_CantonName),
+                    (list_cantonabbr, str_CantonAbbreviation),
+                    (list_citizenship, str_Citizenship),
+                    (list_firstname, str_FirstName),
+                    (list_additionalInfo, str_additionalInfo),
+                    ]
+
+#    return list_cantonname, list_cantonabbr, list_citizenship, list_firstname, list_additionalInfo
+    return list_cantons