diff --git a/data/lists/not_names.txt b/data/lists/not_names.txt
index 1a44b305e7d8e5267e6140416581f5b397009058..3c05f4fa40168b361d648d747d12c3b81e87a6cb 100644
--- a/data/lists/not_names.txt
+++ b/data/lists/not_names.txt
@@ -1,3 +1,4 @@
+Abs
 Alinea
 Alter
 Ari
@@ -8,6 +9,7 @@ bietet
 darin
 drehen
 eher
+Erster
 ess
 FÃ¤llen
 fasse
@@ -43,6 +45,7 @@ Recht
 Schrit
 Seite
 selber
+sicher
 Sinne
 spÃ¤ter
 StÃ¤nder
@@ -68,6 +71,7 @@ Wort
 Worten
 Ziel
 Zuerst
+Zusatz
 allemand
 autre
 Berne
@@ -88,6 +92,7 @@ poser
 projet
 RÃ©diger
 rÃ©diger
+sais
 tirer
 vote
 delle
diff --git a/data/lists/wrongly_identified_speakers.txt b/data/lists/wrongly_identified_speakers.txt
index c46c3fe19a88256f573d41983299317e502749b8..14cc2e22b1bcb583a4786cb4b7aa39015fbc0098 100644
--- a/data/lists/wrongly_identified_speakers.txt
+++ b/data/lists/wrongly_identified_speakers.txt
@@ -10,6 +10,8 @@ speaker not identifiable:
 1951/20034996: found a name: Studer ['Studer'] 0 Studer (CANTON MISSING)  [5130, 5141]
 1951/20035991: Dietschi (CANTON MISSING) Berichterstatter [1350, 1351] 1951-10-02 00:00 9 ['Dietschi', 'Berichterstatter']
 1951/20035171: Perrin (CANTON MISSING) rapporteur [3935, 3939] 1951-12-07 00:00 0 ['Perrin', 'rapporteur']
+1931/20031058: Pfister (CANTON MISSING)  [3980, 3981, 3984] 1931-09-25 00:00 4 ['Pfister']
+1961/20037310: Berger (CANTON MISSING) rapporteur [368, 373, 375] 1961-09-21 00:00 1 ['Berger', 'rapporteur'] 
 
 
 speaker not uniquely identified when he spoke the second time:
@@ -19,12 +21,15 @@ speaker not uniquely identified when he spoke the second time:
 1951/20034982: Perrin-Corcelles rapporteur (first time), after: found a name: M. PÃ©trin,  rapporteur ['PÃ©trin', 'rapporteur'] 0 Perrin (CANTON MISSING) rapporteur [3935, 3939]
 1951/20034995: after: Kunz (CANTON MISSING)  [3017, 3019] 1951-04-03 00:00 21 ['Kunz']
 1951/20034996: Studer
+1931/20031095: Pfister (CANTON MISSING) Berichterstatter [3980, 3981, 3984] 1931-12-10 00:00 3 ['Pf', 'ister', 'Berichterstatter']
+1921/20029087: MÃ¼ller (CANTON MISSING)  [3663, 3695] 1921-01-20 00:00 6 ['MÃ¼ller']
 
 identifier is split into two words
 ----------------------------------
 1925/20029945, 1951/20035173: found a name: Schmid-Oberentf elden ['Schmid', 'Oberentf', 'elden'] 0 Schmid (CANTON MISSING)  [4639, 4660]
 1971/20000498: ['M', 'Muf', 'ny', 'rapporteur', 'de', 'la', 'majoritÃ©'] 7 --> finds Muff but is Mugny
 1951/20034978,79,94: found a name: Bringolf- Schaff hausen  ['Bringolf', 'Schaff', 'hausen'] 0 Bringolf (CANTON MISSING)  [707, 706]
+1941/        : MÃ¼ller Aarb erg
 
 
 identified as speech start but is in text:
@@ -54,21 +59,42 @@ look for typical terms such as gestellt, gesagt, etc.
 1971/20000093: found a name: In  zwei  wesentlichen  Punkten  bin  ich  mit  Herrn Kollega  Biel  absolut  einverstanden ['zwei', 'wesentlichen', 'Punkten', 'Kollega', 'Biel', 'absolut', 'einverstanden'] 1 Biel Walter (ZÃ¼rich ZH)  [426]
 1971/20000614: Zu Herrn Fischer
 1951/20035112: Schmid (CANTON MISSING)  [4639, 4646, 4660] 1951-09-26 00:00 27 ['Antrag', 'Schmid']
+1941/20033145: Prof. BÃ¶hler erklÃ¤rt --> finds BÃ¼hler
 
 Bundesrat not found:
 --------------------
 1951/20035017,26: Petitpierre (CANTON MISSING)  [3955, 3956] 1951-04-03 00:00 8 ['Petitpierre', 'conseiller', 'fÃ©dÃ©ral']
 1951/20035018,20,77,83: Rubattel (CANTON MISSING)  [4381, 4382] 1951-04-03 00:00 6 ['Rubattel', 'conseiller', 'fÃ©dÃ©ral']
+1931/20030968: HÃ¤berlin (CANTON MISSING)  [2290, 2287] 1931-03-24 00:00 6 ['BundesprÃ¤sident', 'HÃ¤berlin'] HÃ¤berlin,Heinrich,TG,BundesprÃ¤sident
+1931/20031089: Meyer (CANTON MISSING) Bundesrat [3482, 3483, 3490, 3495] 1931-12-08 00:00 1 ['Meyer', 'Bundesrat']
+1921/20029085: MÃ¼ller (CANTON MISSING) PrÃ©sident [3663, 3695] 1921-01-19 00:00 9 ['Mlle', 'PrÃ©sident']
+1911/20027998: Forrer (CANTON MISSING) Bundesrat [1771, 1773] 1911-03-30 09:00 2 ['Bundesrat', 'Forrer']
+1911/20028039: MÃ¼ller (CANTON MISSING) Bundesrat [3642, 3653, 3663, 3683] 1911-10-05 08:30 11 ['Bundesrat', 'MÃ¼ller']
+
 
 weird layout:
 -------------
 1971/20000663: de MM. KnÃ¼sel et Leu (there must be more speech starts, this is from a list of cantons and people inside a speech, !!! Layout)
 
+wrong entries in xlsx:
+----------------------
+1931/20030940,49: Scherer (CANTON MISSING)  [4560, 4565] 1931-03-18 00:00 18 ['Scherer'] --> there are two entries for one person
+
+bad OCR:
+--------
+1941/20033146: MÃ¼Her instead of MÃ¼ller is not discovered
+
+
+not sure about place:
+---------------------
+1921/20029090: Seiler (CANTON MISSING)  [4810, 4815] 1921-02-24 00:00 5 ['Seiler', 'Sitten'] found no connection between Seiler Hermann and Sitten
+
 term very similar to one name is actually another name
 ------------------------------------------------------
 1925/20029863: ganz --> finds Lanz, there is a Ganz
 1971/20000630 and others: Schweizer --> finds Schneider, there is a Schweizer
 1951/20035112: Schweizer --> finds Schwizer
+1921/20029145: Seiler (CANTON MISSING)  [4810, 4815] 1921-04-13 00:00 10 ['Nationalrat', 'Speiser', 'sagte'] finds Seiler for Speiser
 
 
 term is a name
@@ -88,11 +114,15 @@ person has entry date 29.11.71 but is not yet active (presumably):
 1971/20000587: Tanner Paul starts officiall on 29.11.71, discussion is on 30.11.71 --> finds two!
 1971/20000588: one Kohler starts 29.11.71, discussion is on 30.11.71 --> finds two!
 1971/20000726: one Muheim starts 29.11.71, discussion is on 8.12.71 --> finds two!
+1921/20029265: one Huber starts 5.12.21, discussion is on 6.12.21 --> finds two
 
 
 Firstname before LastName
 -------------------------
 1971/20000592: Simon Kohler rapporteur
+1911/20028008: Frey (CANTON MISSING)  [1816, 1828] 1911-06-13 08:00 2 ['Alfred', 'Frey']
+1911/20028010: Eugster (CANTON MISSING)  [1571, 1572] 1911-06-22 08:00 15 ['Arthur', 'Eugster']
+1961/20037222: Borel (CANTON MISSING)  [590, 591] 1961-03-15 00:00 6 ['Georges', 'Borei']
 
 two people with same last name and same citizenship
 ---------------------------------------------------
diff --git a/data/politicians/MPs_additionalInfo.csv b/data/politicians/MPs_additionalInfo.csv
index 83d56dafb44dc980eb1db280f9f78938faecaa8d..b76f6c08b99b3ba5795f7743bce0ad2071651bab 100644
--- a/data/politicians/MPs_additionalInfo.csv
+++ b/data/politicians/MPs_additionalInfo.csv
@@ -46,3 +46,7 @@ Weber,Jakob Rudolf,BE,Grasswil
 Weber,Heinrich Otto,SG,Gallen
 Sigg,Jean-C.,GE,GenÃ¨ve
 Suter,Johannes,BL,Baselland
+Sonderegger,Johann Jakob,AR,Ausserrhoden
+Sonderegger,Karl Justin,IR,Innerrhoden
+MÃ¼ller,Emil,BL,Baselland
+KÃ¶nig,Walter,BE,Biel
diff --git a/src/python/utils_annot.py b/src/python/utils_annot.py
index 8572e209b340f644e120b0fe377fd1885a5ee240..daffdb615a55714ee6afab016eb2634627df77f2 100644
--- a/src/python/utils_annot.py
+++ b/src/python/utils_annot.py
@@ -243,8 +243,8 @@ def label_speechstart(XML_new, ind_p, ind_t, text, ind_tl_colon, df_names, list_
     list_roles = ['PrÃ¤sident', 'PrÃ¤sidentin', 'VizeprÃ¤sident', 'PrÃ¤sidium', 'PrÃ©sident', 'PrÃ©sidente', 'prÃ©sident', 'prÃ©sidente',
                   'Berichterstatter', 'Berichterstatterin', 'rapporteur',
                   'Sprecher', 'Sprecherin', 'porte-parole', 'porteparole',
-                  'Bundesrat', 'Bundesrath', 'BundesrÃ¤tin', 'conseiller fÃ©dÃ©ral',
-                  'VizeprÃ¤sident']
+                  'Bundesrat', 'Bundesrath', 'BundesrÃ¤tin', 'conseiller', 'fÃ©dÃ©ral', 'fÃ©dÃ©ral'
+                  'VizeprÃ¤sident', 'BundesprÃ¤sident']
     list_roles_ext = ['Mehrheit', 'Minderheit', 'majoritÃ©', 'minoritÃ©', 'deutscher', 'deutsche', 'franÃ§ais', 'franÃ§aise', 'Kommission', 'commission']
 
     # initialize flag
@@ -265,132 +265,138 @@ def label_speechstart(XML_new, ind_p, ind_t, text, ind_tl_colon, df_names, list_
         text_start = re.sub(r'[\(\)]','',text[:colon_index_text])
         list_oi = tokenizer.tokenize(text_start)
 
-        if len(list_oi) > 4:
-            with open('data/lists/notunique.txt', 'a') as f:
-                f.write(' '.join((str(list_oi), str(len(list_oi)), '\n')))
-            flag_print = True
-
         if bln_print:
             print('possible speech start: ', list_oi)
 
-        # remove stopwords
-        list_oi = [term for term in list_oi if term.lower() not in list_stopwords]
+        # to avoid false positives, the number of elements in list_oi is checked
+        # - if it is too long, it is part of a speech and not a speech start
+        # - for intermediate lengths between 5 and 8, it can either be a speech
+        #       start (if it contains a role) or part of a speech
+        # - short lengths typically indicate a speech start, but not always. These
+        #       false positives cannot be avoided with this procedure.
+        if (len(list_oi) < 9):
+            if (len(list_oi) < 5) or (len(set(list_oi).intersection(list_roles)) > 0):
 
-        # remove punctuation
-        list_oi = [''.join(c for c in s if c not in string.punctuation) for s in list_oi]
-        list_oi = [s for s in list_oi if s]
+                with open('data/lists/notunique.txt', 'a') as f:
+                    f.write(' '.join((str(list_oi), str(len(list_oi)), '\n')))
+                flag_print = True
 
-        # remove lower case terms
-#        list_oi = [term for term in list_oi if not term.islower()]
+                # remove stopwords
+                list_oi = [term for term in list_oi if term.lower() not in list_stopwords]
 
-        # remove numbers
-        list_oi = [term for term in list_oi if not term.isdigit()]
+                # remove punctuation
+                list_oi = [''.join(c for c in s if c not in string.punctuation) for s in list_oi]
+                list_oi = [s for s in list_oi if s]
 
-        # remove single characters
-        # TODO: might need to be changed for fractions (some fractions are abbreviated as single letters)
-        # TODO: needs to be changed to include 'I' for Minderheit I 1891/20000093
-        # TODO: maybe exclude I and A to account for Appenzell
-        list_oi = [term for term in list_oi if len(term)>1]
+                # remove lower case terms
+        #        list_oi = [term for term in list_oi if not term.islower()]
 
-        if len(list_oi) > 4 or flag_print:
-            with open('data/lists/notunique.txt', 'a') as f:
-                f.write(' '.join((str(list_oi), str(len(list_oi)), '\n')))
+                # remove numbers
+                list_oi = [term for term in list_oi if not term.isdigit()]
 
-        # if possible, find a name from the list
-        str_name, str_role, list_uniqueID, str_canton = find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str_council, str_date, bln_print=False)
-        if bln_print:
-            print('name', str_name, 'role', str_role)
+                # remove single characters
+                # TODO: might need to be changed for fractions (some fractions are abbreviated as single letters)
+                # TODO: needs to be changed to include 'I' for Minderheit I 1891/20000093
+                # TODO: maybe exclude I and A to account for Appenzell
+                list_oi = [term for term in list_oi if len(term)>1]
 
-        if len(list_uniqueID) > 1 or flag_print:
-            with open('data/lists/notunique.txt', 'a') as f:
-                f.write(' '.join((str_name, str_role, str(list_uniqueID), str_date, str(ind_p), str(list_oi), '\n')))
+                if len(list_oi) > 4 or flag_print:
+                    with open('data/lists/notunique.txt', 'a') as f:
+                        f.write(' '.join((str(list_oi), str(len(list_oi)), '\n')))
 
-        # get rid of 'PrÃ¤sident stimmt nicht PrÃ©sident ne vote pas'
-        if set(str_role.split()).intersection(set(['PrÃ¤sident', 'PrÃ¤sidentin', 'PrÃ©sident', 'PrÃ©sidente'])) and not str_name:
-            if set(['stimmt', 'nicht', 'vote', 'pas']).intersection(list_oi):
-                if bln_print:
-                    print('get rid of PrÃ¤sident stimmt nicht, PrÃ©sident ne vote pas', list_oi)
-                str_role = ''
+                # if possible, find a name from the list
+                str_name, str_role, list_uniqueID, str_canton = find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str_council, str_date, bln_print=False)
 
-        # get rid of 'FÃ¼r den Antrag "Name" stimmen: Votent pour la proposition "Name":'
-        if str_name:
-            if len(set(['Antrag', 'stimmen', 'Votent', 'proposition']).intersection(list_oi)) > 1:
-                if bln_print:
-                    print('get rid of FÃ¼r den Antrag <Name> stimmen: Votent pour la proposition <Name>:', list_oi)
-                str_name = ''
-
-        # if a name has been found, add it to XML_new
-        if str_name or str_role:
-            # add attribute speech_start to textbox
-            XML_new[ind_p][ind_t].attrib['text_type'] = 'speech_start'
-
-            # add speaker as attribute to first textline
-            XML_new[ind_p][ind_t][0].attrib['speaker'] = (str_name, str_role, list_uniqueID, str_canton)
-
-            # update text of XML (speaker is on first line, actual speech start on second line of speech_start textbox)
-            # if colon is on first line
-            if ind_tl_colon == 0:
-                # get text of that line and colon index
-                thattext = XML_new[ind_p][ind_t][0].text
-                colon_index = thattext.index(':')
-
-                try:
-                    # write speaker to first line
-                    XML_new[ind_p][ind_t][0].text = thattext[:colon_index+1] + fontend
-
-                    # get start of speech with correct font start
-                    if thattext[colon_index+1:].startswith('[font'):
-                        startspeech = thattext[colon_index+1:]
-                    elif re.match('^[ ]?\[/font\]$', thattext[colon_index+1:]):
-                        startspeech = ''
-                    elif re.match('^[ ]?\[/font\]', thattext[colon_index+1:]):
-                        startspeech = thattext[colon_index+8:]
-                    else:
-                        startspeech = thattext[colon_index+1:]
+                if len(list_uniqueID) > 1 or flag_print:
+                    with open('data/lists/notunique.txt', 'a') as f:
+                        f.write(' '.join((str_name, str_role, str(list_uniqueID), str_date, str(ind_p), str(list_oi), '\n')))
 
-                    # write beginning of speech to second line
-                    # (create new ET element if necessary)
-                    if len(list(XML_new[ind_p][ind_t])) > 1:
-                        XML_new[ind_p][ind_t][1].text = startspeech + ' ' + XML_new[ind_p][ind_t][1].text
-                    else:
-                        XML_new[ind_p][ind_t].append(copy.deepcopy(XML_new[ind_p][ind_t][0]))
-                        XML_new[ind_p][ind_t][1].attrib.pop('speaker')
-                        XML_new[ind_p][ind_t][1].text = startspeech
-                except:
-                    print('error in self.input_file when splitting speaker')
-                    #print(thattext)
-                    #print(len(list(XML_new[ind_p][ind_t])))
-                    #print(list(XML_new[ind_p][ind_t]))
-                    #print(XML_new[ind_p][ind_t])
-                    #print('gefundener Name:', str_name, str_role)
-                    pass
+                # get rid of 'PrÃ¤sident stimmt nicht PrÃ©sident ne vote pas'
+                if set(str_role.split()).intersection(set(['PrÃ¤sident', 'PrÃ¤sidentin', 'PrÃ©sident', 'PrÃ©sidente'])) and not str_name:
+                    if set(['stimmt', 'nicht', 'vote', 'pas']).intersection(list_oi):
+                        if bln_print:
+                            print('get rid of PrÃ¤sident stimmt nicht, PrÃ©sident ne vote pas', list_oi)
+                        str_role = ''
 
-            # if colon is on second line
-            if ind_tl_colon == 1:
-                # get text of that line and colon index
-                thattext = XML_new[ind_p][ind_t][1].text
-                colon_index = thattext.index(':')
-
-                # get start of speech with correct font start
-                if thattext[colon_index+1:].startswith('[font'):
-                    startspeech = thattext[colon_index+1:]
-                elif re.match('^[ ]?\[/font\]$', thattext[colon_index+1:]):
-                    startspeech = ''
-                elif re.match('^[ ]?\[/font\]', thattext[colon_index+1:]):
-                    startspeech = thattext[colon_index+8:]
-                else:
-                    startspeech = thattext[colon_index+1:]
+                # get rid of 'FÃ¼r den Antrag "Name" stimmen: Votent pour la proposition "Name":'
+                if str_name:
+                    if len(set(['Antrag', 'stimmen', 'Votent', 'proposition']).intersection(list_oi)) > 1:
+                        if bln_print:
+                            print('get rid of FÃ¼r den Antrag <Name> stimmen: Votent pour la proposition <Name>:', list_oi)
+                        str_name = ''
+
+                # if a name has been found, add it to XML_new
+                if str_name or str_role:
+                    # add attribute speech_start to textbox
+                    XML_new[ind_p][ind_t].attrib['text_type'] = 'speech_start'
+
+                    # add speaker as attribute to first textline
+                    XML_new[ind_p][ind_t][0].attrib['speaker'] = (str_name, str_role, list_uniqueID, str_canton)
+
+                    # update text of XML (speaker is on first line, actual speech start on second line of speech_start textbox)
+                    # if colon is on first line
+                    if ind_tl_colon == 0:
+                        # get text of that line and colon index
+                        thattext = XML_new[ind_p][ind_t][0].text
+                        colon_index = thattext.index(':')
+
+                        try:
+                            # write speaker to first line
+                            XML_new[ind_p][ind_t][0].text = thattext[:colon_index+1] + fontend
+
+                            # get start of speech with correct font start
+                            if thattext[colon_index+1:].startswith('[font'):
+                                startspeech = thattext[colon_index+1:]
+                            elif re.match('^[ ]?\[/font\]$', thattext[colon_index+1:]):
+                                startspeech = ''
+                            elif re.match('^[ ]?\[/font\]', thattext[colon_index+1:]):
+                                startspeech = thattext[colon_index+8:]
+                            else:
+                                startspeech = thattext[colon_index+1:]
+
+                            # write beginning of speech to second line
+                            # (create new ET element if necessary)
+                            if len(list(XML_new[ind_p][ind_t])) > 1:
+                                XML_new[ind_p][ind_t][1].text = startspeech + ' ' + XML_new[ind_p][ind_t][1].text
+                            else:
+                                XML_new[ind_p][ind_t].append(copy.deepcopy(XML_new[ind_p][ind_t][0]))
+                                XML_new[ind_p][ind_t][1].attrib.pop('speaker')
+                                XML_new[ind_p][ind_t][1].text = startspeech
+                        except:
+                            print('error in self.input_file when splitting speaker')
+                            #print(thattext)
+                            #print(len(list(XML_new[ind_p][ind_t])))
+                            #print(list(XML_new[ind_p][ind_t]))
+                            #print(XML_new[ind_p][ind_t])
+                            #print('gefundener Name:', str_name, str_role)
+                            pass
+
+                    # if colon is on second line
+                    if ind_tl_colon == 1:
+                        # get text of that line and colon index
+                        thattext = XML_new[ind_p][ind_t][1].text
+                        colon_index = thattext.index(':')
+
+                        # get start of speech with correct font start
+                        if thattext[colon_index+1:].startswith('[font'):
+                            startspeech = thattext[colon_index+1:]
+                        elif re.match('^[ ]?\[/font\]$', thattext[colon_index+1:]):
+                            startspeech = ''
+                        elif re.match('^[ ]?\[/font\]', thattext[colon_index+1:]):
+                            startspeech = thattext[colon_index+8:]
+                        else:
+                            startspeech = thattext[colon_index+1:]
 
-                # write speaker to first line
-                XML_new[ind_p][ind_t][0].text = XML_new[ind_p][ind_t][0].text + ' ' + thattext[:colon_index+1] + fontend
-                # write beginning of speech to second line
-                XML_new[ind_p][ind_t][1].text = startspeech
+                        # write speaker to first line
+                        XML_new[ind_p][ind_t][0].text = XML_new[ind_p][ind_t][0].text + ' ' + thattext[:colon_index+1] + fontend
+                        # write beginning of speech to second line
+                        XML_new[ind_p][ind_t][1].text = startspeech
 
-            # set flag
-            this_is_speech = True
-            if bln_print:
-                print('found a name:', text_start, list_oi, str_name, str_role, '\n')
-            print('found a name:', text_start, list_oi, ind_tl_colon, str_name, str_role, list_uniqueID, '\n')
+                    # set flag
+                    this_is_speech = True
+                    if bln_print:
+                        print('found a name:', text_start, list_oi, str_name, str_role, '\n')
+                    print('found a name:', text_start, list_oi, ind_tl_colon, str_name, str_role, list_uniqueID, '\n')
 
     return XML_new, this_is_speech
 
@@ -687,7 +693,7 @@ def find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str
                     str_completeName = df_names['completeName'].loc[(df_names['nameType']==name_type) & (df_names['shortName']==str_name.split(' ')[0]) & (df_names['CouncilName']==str_council)].iloc[0]
 
                     list_uniqueID = update_list_uniqueID(list_uniqueID, list_temp, name_type)
-                    if str_completeName.split(' ')[0] == str_name.split(' ')[0]:
+                    if str_completeName.split(' ')[0] == str_name.split(' ')[0] or str_completeName.split(' ')[1] == str_name.split(' ')[0]:
                         str_name = add_to_string('', str_completeName)
                     else:
                         str_name = add_to_string(str_name, str_completeName)