diff --git a/src/python/extractMPs.py b/src/python/extractMPs.py
index cd3504da7a302a2f0560644fa1475c0e311e0e99..01f87b7674b82d4895ce39566b243e5629bf0175 100644
--- a/src/python/extractMPs.py
+++ b/src/python/extractMPs.py
@@ -8,7 +8,7 @@ import sys
 
 input_file = sys.argv[1]    #'./data/politicians/Ratsmitglieder_1848_DE_corr.xlsx'
 output_file_csv = sys.argv[2]     #'./data/politicians/MPs_after1890.csv'
-output_folder_dict = sys.argv[3]    
+output_folder_dict = sys.argv[3]
 
 
 class MPs_Extractor(object):
@@ -19,12 +19,12 @@ class MPs_Extractor(object):
         self.output_folder_dict = output_folder_dict
         self.range_years = range(years[0], years[1] + 1)
         self.df_exc = df_exc
-        
+
     # function to get lists of lastnames
     # input:
     # - df_year: dataframe for a year
     # output:
-    # - list_names: 
+    # - list_names:
     #      contains:
     #        - list of last names that appear only once and cannot be split
     #        - list of last name that are made up of two names such as 'Meier-Müller'
@@ -44,7 +44,7 @@ class MPs_Extractor(object):
         str_comp = 'comp'
         str_canton2 = 'canton'
 
-	# function to split lastname and save meaningful part(s) to list 
+	# function to split lastname and save meaningful part(s) to list
         def split_lastname(lastname, uniqueID, tpl_canton, str_canton = ''):
 	    # if last name is a composite name, e.g. 'von Arx' and 'de Stoppani'
             lastname_split = lastname.split()
@@ -70,7 +70,7 @@ class MPs_Extractor(object):
                     list_names.append((str_double, lastname, lastname, uniqueID) + tpl_canton)
 		    # write double name without space into list
                     list_names.append((str_double, ''.join(lastname.split('-')), lastname, uniqueID) + tpl_canton)
-                else:       
+                else:
                     if str_canton:
                         list_names.append((str_canton2, lastname, str_canton, uniqueID) + tpl_canton)
                     else:
@@ -82,66 +82,67 @@ class MPs_Extractor(object):
             str_cantonabbr = df_year['CantonAbbreviation'].loc[df_year['uniqueIndex']==uniqueID].iloc[0]
             str_citizenship = df_year['Citizenship'].loc[df_year['uniqueIndex']==uniqueID].iloc[0]
             str_firstname = df_year['FirstName'].loc[df_year['uniqueIndex']==uniqueID].iloc[0]
- 
-            return (str_cantonname, str_cantonabbr, str_citizenship, str_firstname)
+            str_doublename = df_year['DoubleName'].loc[df_year['uniqueIndex']==uniqueID].iloc[0]
+
+            return (str_cantonname, str_cantonabbr, str_citizenship, str_firstname, str_doublename)
 
-	# create empty lists for last names
+    	# create empty lists for last names
         list_names = []
-	
-	# for every last name
+
+    	# for every last name
         for lastname in df_year['LastName'].drop_duplicates():
 	    #print('name', lastname, type(lastname))
-	    
-	    # extract all entries with that last name
+
+    	    # extract all entries with that last name
             df_temp = df_year.loc[df_after1890['LastName']==lastname]
-	    #print(df_temp)
-	    
-	    # if there is an extra double name
+    	    #print(df_temp)
+
+    	    # if there is an extra double name
             if df_temp.iloc[0]['DoubleName'] != '':
-		# extract unique index
+    		# extract unique index
                 uniqueID = df_temp.iloc[0]['uniqueIndex']
-		
+
                 # get canton information for that uniqueID
                 tpl_canton = get_canton(df_year, uniqueID)
 
                 #print('double name', df_temp)
                 doublename = df_temp.iloc[0]['DoubleName']
 
-		# if last name is a double name, e.g. 'Meier-Müller'
+        		# if last name is a double name, e.g. 'Meier-Müller'
                 lastname_split2 = doublename.replace('-', ' ').split()
                 if len(lastname_split2) > 1:
-		    # write each part of double name into corresponding list
+        		    # write each part of double name into corresponding list
                     for item in lastname_split2:
                         list_names.append((str_double, item, lastname, uniqueID) + tpl_canton)
-		    # write double name into list
+        		    # write double name into list
                     list_names.append((str_double, doublename, lastname, uniqueID) + tpl_canton)
-		    # write double name without space into list
+        		    # write double name without space into list
                     list_names.append((str_double, ''.join(doublename.split('-')), lastname, uniqueID) + tpl_canton)
 
-	    # if only one person with that last name
+    	    # if only one person with that last name
             if df_temp.drop_duplicates(['uniqueIndex']).shape[0] == 1:
-		# extract unique index
+    		# extract unique index
                 uniqueID = df_temp.iloc[0]['uniqueIndex']
 
                 # get canton information for that uniqueID
                 tpl_canton = get_canton(df_year, uniqueID)
 
-		# write complete name to list of last names
+        		# write complete name to list of last names
                 split_lastname(lastname, uniqueID, tpl_canton)
-		
-	    # if there are several people with the same last name
+
+    	    # if there are several people with the same last name
             else:
-		# write last name and canton to correct list
+        		# write last name and canton to correct list
                 for idx, row in df_temp.drop_duplicates(['uniqueIndex']).iterrows():
-		    # extract unique index
+        		    # extract unique index
                     uniqueID = df_temp.loc[idx]['uniqueIndex']
-		    
+
                     # get canton information for that uniqueID
                     tpl_canton = get_canton(df_year, uniqueID)
 
-		    # write the lastname to the list
-                    split_lastname(lastname, uniqueID, tpl_canton, row['LastName'] + ' (' + row['CantonName'] + ')') 
-			
+        		    # write the lastname to the list
+                    split_lastname(lastname, uniqueID, tpl_canton, row['LastName'] + ' (' + row['CantonName'] + ')')
+
         return list_names
 
     def extract(self):
@@ -172,7 +173,7 @@ class MPs_Extractor(object):
         # group by first and last name, and date of birth
         grouped = df_after1890.groupby(["LastName", "FirstName", "DateOfBirth"])
 
-        # assign first index to all entries of a person 
+        # assign first index to all entries of a person
         for list_index in grouped.groups.values():
             df_after1890.loc[list_index, 'uniqueIndex'] = list_index[0]
 
@@ -192,15 +193,15 @@ class MPs_Extractor(object):
             df_year = df_after1890[pd.to_datetime(df_after1890['DateJoining']) <= datetime.datetime(year, 12, 31, 0, 0)]
             df_year = df_year[pd.to_datetime(df_year['DateLeaving']) >= datetime.datetime(year, 1, 1, 0, 0)]
             print(year, df_year.shape)
-            
+
             # write df_year to a yearly csv file
         #    str_year = str(year)
         #    df_year.to_csv('home/lili/NLP_DemocraSci/nlp-democracy/output/MPs/MPs_' + str_year + '.csv')
-            
+
             # create a pandas dataframe from list of names
             # !!! list contains errors, see definition of function
             list_lastnames = self.get_list_of_lastnames(df_year, df_after1890)
-            df_lastnames = pd.DataFrame(list_lastnames, columns = ('type', 'name_short', 'name_correct', 'uniqueIndex', 'CantonName', 'CantonAbbreviation', 'Citizenship', 'FirstName'))
+            df_lastnames = pd.DataFrame(list_lastnames, columns = ('type', 'name_short', 'name_correct', 'uniqueIndex', 'CantonName', 'CantonAbbreviation', 'Citizenship', 'FirstName', 'DoubleName'))
 
             # dump dictionary of last names to a pickle file
 #           path = pathlib.
@@ -213,19 +214,18 @@ years = [1891, 2016]   #2016
 
 df_exc = pd.DataFrame(columns=['LastName', 'FirstName', 'DoubleName'])
 # exception: Konrad H. Cramer is also reffered to as Cramer-Frey. Add double name in extra-column
-df_exc.loc[len(df_exc)] = {'LastName': 'Cramer', 'FirstName': 'Konrad H.', 'DoubleName': 'Cramer-Frey'} 
+df_exc.loc[len(df_exc)] = {'LastName': 'Cramer', 'FirstName': 'Konrad H.', 'DoubleName': 'Cramer-Frey'}
 # exception: Johannes Blumer SG is also reffered to as Blumer-Egloff. Add double name in extra-column
-df_exc.loc[len(df_exc)] = {'LastName': 'Blumer', 'FirstName': 'Johannes', 'DoubleName': 'Blumer-Egloff'} 
+df_exc.loc[len(df_exc)] = {'LastName': 'Blumer', 'FirstName': 'Johannes', 'DoubleName': 'Blumer-Egloff'}
 # exception: Adolphe Jordan VD is also reffered to as Jordan-Martin. Add double name in extra-column
-df_exc.loc[len(df_exc)] = {'LastName': 'Jordan', 'FirstName': 'Adolphe', 'DoubleName': 'Jordan-Martin'} 
+df_exc.loc[len(df_exc)] = {'LastName': 'Jordan', 'FirstName': 'Adolphe', 'DoubleName': 'Jordan-Martin'}
 # exception: Jakob Schmid LU is also reffered to as Schmid-Ronca. Add double name in extra-column
-df_exc.loc[len(df_exc)] = {'LastName': 'Schmid', 'FirstName': 'Jakob', 'DoubleName': 'Schmid-Ronca'} 
+df_exc.loc[len(df_exc)] = {'LastName': 'Schmid', 'FirstName': 'Jakob', 'DoubleName': 'Schmid-Ronca'}
 # exception: Eduard Sulzer ZH is also reffered to as Sulzer-Ziegler. Add double name in extra-column
-df_exc.loc[len(df_exc)] = {'LastName': 'Sulzer', 'FirstName': 'Eduard', 'DoubleName': 'Sulzer-Ziegler'} 
+df_exc.loc[len(df_exc)] = {'LastName': 'Sulzer', 'FirstName': 'Eduard', 'DoubleName': 'Sulzer-Ziegler'}
 # exception: Howard Eugster AR is also reffered to as Eugster-Züst. Add double name in extra-column
-df_exc.loc[len(df_exc)] = {'LastName': 'Eugster', 'FirstName': 'Howard', 'DoubleName': 'Eugster-Züst'} 
+df_exc.loc[len(df_exc)] = {'LastName': 'Eugster', 'FirstName': 'Howard', 'DoubleName': 'Eugster-Züst'}
 #print(df_exc)
 
 mps_extractor = MPs_Extractor(years, input_file, output_file_csv, output_folder_dict, df_exc)
 mps_extractor.extract()
-
diff --git a/src/python/utils_annot.py b/src/python/utils_annot.py
index e27d825a0c6d1270523ac4e185f9f021f0b5a5bb..66bedcc9ab6636f4ee534db0518f9869fd789c92 100644
--- a/src/python/utils_annot.py
+++ b/src/python/utils_annot.py
@@ -264,12 +264,13 @@ def label_speechstart(XML_new, ind_p, ind_t, text, ind_tl_colon, df_names, list_
         list_oi = [term for term in list_oi if not term.isdigit()]
 
         # remove single characters
+        # TODO: might need to be changed for fractions (some fractions are abbreviated as single letters)
         list_oi = [term for term in list_oi if len(term)>1]
 
-        # for every term
-        for term in list_oi:
-            # if possible, find a name in a list
-            str_name, str_role, list_uniqueID, str_canton = find_names(term, str_name, str_role, list_uniqueID, df_names, str_canton, bln_print=False)
+#        # for every term
+#        for term in list_oi:
+        # if possible, find a name in a list
+        str_name, str_role, list_uniqueID, str_canton = find_names(list_oi, str_name, str_role, list_uniqueID, df_names, str_canton, bln_print=False)
         if bln_print:
             print('name', str_name, 'role', str_role)
 
@@ -425,7 +426,7 @@ def flatten(l):
 # - list_tupels: list of tupels containing all types of names
 # TODO: correctly extract canton! don't do reversed, find name first that might have issue with canton, then look for possible canton
 # TODO: double names in extra column: string gets written twice, uniqueID is fine (1893, 20026532, Cramer)
-def find_names(term, str_name, str_role, list_uniqueID, df_names, str_canton, bln_print=False):
+def find_names(list_oi, str_name, str_role, list_uniqueID, df_names, str_canton, bln_print=False):
 
     def get_string(term, str_name, str_role, list_uniqueID, str_canton):
         name_type = ''
@@ -439,7 +440,7 @@ def find_names(term, str_name, str_role, list_uniqueID, df_names, str_canton, bl
         # if it is a double name
         elif term in list(df_names['name_short'].loc[df_names['type']=='double']):
             if bln_print:
-                print(20*'\n', 'DOUBLE NAME')
+                print(5*'\n', 'DOUBLE NAME')
             # get correct name
             correct_name = df_names.loc[(df_names['type']=='double') & (df_names['name_short']== term)].iat[0, df_names.columns.get_loc('name_correct')]
             if bln_print:
@@ -463,18 +464,7 @@ def find_names(term, str_name, str_role, list_uniqueID, df_names, str_canton, bl
         elif term in list(df_names['name_short'].loc[df_names['type']=='canton']):
             if bln_print:
                 print('contains a canton', term)
-#            canton_missing = False
-#            df_temp = df_names.loc[df_names['name_short']==term]
-#            print('list_correct', df_temp)
-#            print(str_canton)
-#            if str_canton:
-#                str_correct = check_place(df_temp, str_canton)
-#                if str_correct in ['not found', 'too many']:
-#                    str_name = add_to_string(str_name, term + ' (CANTONT MISSING)')
-#                    canton_missing = True
-#                else:
-#                    str_name = add_to_string(str_name, str_temp)
-#                    name_type = 'canton'
+
             str_name = add_to_string(str_name, term + ' (CANTON MISSING)')
             name_type = 'canton'
 
@@ -485,10 +475,7 @@ def find_names(term, str_name, str_role, list_uniqueID, df_names, str_canton, bl
             list_temp = [df_names.loc[(df_names['type']==name_type) & (df_names['name_short']==term)].iat[0, df_names.columns.get_loc('uniqueIndex')]]
         elif name_type in ['canton']:
             list_temp = list(df_names.loc[(df_names['type']==name_type) & (df_names['name_short']==term)].iloc[:, df_names.columns.get_loc('uniqueIndex')])
-#            if canton_missing:
-#                temp = tuple(df_names.loc[(df_names['type']==name_type) & (df_names['name_short']==term)].iloc[:, df_names.columns.get_loc('uniqueIndex')])
-#            else:
-#                temp = df_names.loc[(df_names['type']==name_type) & (df_names['name_correct']==str_correct)].iat[0, df_names.columns.get_loc('uniqueIndex')]
+
 
         if len(list_temp) > 0:
             if bln_print:
@@ -519,7 +506,7 @@ def find_names(term, str_name, str_role, list_uniqueID, df_names, str_canton, bl
             #else:
                 #int_uniqueID = (int_uniqueID, temp)
 
-        return str_name, str_role, list_uniqueID
+        return str_name, str_role, list_uniqueID, name_type
 
     # small function to add term to str_name
     def add_to_string(string, term):
@@ -538,60 +525,96 @@ def find_names(term, str_name, str_role, list_uniqueID, df_names, str_canton, bl
     list_notnames = ['Art', 'Rath', 'Alinea', 'Stimmen', 'Stimme', 'Hans', 'Walter', 'Werner', 'projet', 'stimmt', 'nicht', 'vote', 'Gallen', 'StGallen',
                      'Kasse', 'fasse', 'Sitten', 'Herren', 'Herr', 'Alter', 'Biffer', 'biffer', 'Rédiger', 'rédiger', 'Wer', 'Fällen', 'Ari', 'bietet', 'autre']
 
-    list_places = get_list_cantons(df_names)
+    list_cantonname, list_cantonabbr, list_citizenship, list_firstname = get_list_cantons(df_names)
 
-    if bln_print:
-        print('now is about: ------', term)
     # extract list and array of last names
     list_all_names = list(df_names['name_short'])
     array_all_names = np.array(df_names['name_short'])
 
-    # if term is not easily mistaken as a name (avoid false positives)
-    if term not in list_notnames:
+    # initialize name_type
+    name_type = ''
 
-        # if term is in the list of all names and roles
-        if term in (list_all_names + list_roles):
-            # get correct name and uniqueID, or role, for that term
-            str_name, str_role, list_uniqueID = get_string(term, str_name, str_role, list_uniqueID, str_canton)
+    # for every term
+    for term in list_oi:
 
-            if bln_print:
-                print('=== correct name', term)
-        # if term in list of cantons
-        elif term in list_places:
-            str_canton = term
-        # if term is not in list_all_names
-        else:
-            # look for similar names based on (normalized) Damerau-Levenshtein distance
-            # !!! probably need to improve this procedure
-            #       - find better values ....
-            if bln_print:
-                print(term)
-            array_normalized = array_all_names[normalized_damerau_levenshtein_distance_ndarray(term, array_all_names) <= 0.35]
-            array_normalized_values = normalized_damerau_levenshtein_distance_ndarray(term, array_normalized)
-            if bln_print:
-                print(array_normalized, array_normalized_values)
-            array_absolute = array_all_names[damerau_levenshtein_distance_ndarray(term, array_all_names) <= 2]
-            array_absolute_values = damerau_levenshtein_distance_ndarray(term, array_absolute)
-            if bln_print:
-                print(array_absolute, array_absolute_values)
-            set_intersection = set(array_normalized).intersection(set(array_absolute))
-            # check if a similar name was found
-            term_approx = ''
-            if len(set_intersection) == 1:
-                term_approx = list(set_intersection)[0]
+        if bln_print:
+            print('now is about: ------', term)
+
+        if name_type == 'canton':
+            canton_type = ''
+            if term in list_cantonname:
+                str_canton = term
+                canton_type = 'CantonName'
+                print('!!! is a canton', term, list_oi, str_name, str_role)
+            elif term in list_cantonabbr:
+                str_canton = term
+                canton_type = 'CantonAbbr'
+                print('!!! is a canton', term, list_oi, str_name, str_role)
+            elif term in list_citizenship:
+                str_canton = term
+                canton_type = 'Citizenship'
+                print('!!! is a canton', term, list_oi, str_name, str_role)
+            elif term in list_firstname:
+                str_canton = term
+                canton_type = 'FirstName'
+                print('!!! is a canton', term, list_oi, str_name, str_role)
+
+            else:
+                print('might be a canton', term, list_oi, str_name, str_role)
+
+            if canton_type:
+                # get rid of CANTON MISSING
+                str_name = str_name.split(' ')[0]
+                # extract uniqueID
+#                list_temp = []
+                list_temp = list(df_names.loc[(df_names['type']==name_type) & (df_names['name_short']==str_name) & (df_names[canton_type]==str_canton)].iloc[:, df_names.columns.get_loc('uniqueIndex')])
+                print(list_temp, list_uniqueID)
+                list_uniqueID = list_temp
+
+        # if term is not easily mistaken as a name (avoid false positives)
+        if term not in list_notnames:
+
+            # if term is in the list of all names and roles
+            if term in (list_all_names + list_roles):
+                # get correct name and uniqueID, or role, for that term
+                str_name, str_role, list_uniqueID, name_type = get_string(term, str_name, str_role, list_uniqueID, str_canton)
+
+                if bln_print:
+                    print('=== correct name', term)
+
+            # if term is not in list_all_names
+            else:
+                # look for similar names based on (normalized) Damerau-Levenshtein distance
+                # !!! probably need to improve this procedure
+                #       - find better values ....
                 if bln_print:
-                    print('we found the name', set_intersection)
-            elif len(set_intersection) > 1:
-                # !!! we only look at normalized values
-                # !!! we don't account for names with same values !!!
-                array_min = array_normalized[array_normalized_values.argmin()]
-                term_approx = array_min#[0]
+                    print(term)
+                array_normalized = array_all_names[normalized_damerau_levenshtein_distance_ndarray(term, array_all_names) <= 0.35]
+                array_normalized_values = normalized_damerau_levenshtein_distance_ndarray(term, array_normalized)
                 if bln_print:
-                    print('we found several possible names', set_intersection, 'and choose', array_min)
-            if term_approx:
-                str_name, str_role, list_uniqueID = get_string(term_approx, str_name, str_role, list_uniqueID, str_canton)
+                    print(array_normalized, array_normalized_values)
+                array_absolute = array_all_names[damerau_levenshtein_distance_ndarray(term, array_all_names) <= 2]
+                array_absolute_values = damerau_levenshtein_distance_ndarray(term, array_absolute)
                 if bln_print:
-                    print('*******************', str_name, term_approx)
+                    print(array_absolute, array_absolute_values)
+                set_intersection = set(array_normalized).intersection(set(array_absolute))
+                # check if a similar name was found
+                term_approx = ''
+                if len(set_intersection) == 1:
+                    term_approx = list(set_intersection)[0]
+                    if bln_print:
+                        print('we found the name', set_intersection)
+                elif len(set_intersection) > 1:
+                    # !!! we only look at normalized values
+                    # !!! we don't account for names with same values !!!
+                    array_min = array_normalized[array_normalized_values.argmin()]
+                    term_approx = array_min#[0]
+                    if bln_print:
+                        print('we found several possible names', set_intersection, 'and choose', array_min)
+                if term_approx:
+                    str_name, str_role, list_uniqueID, name_type = get_string(term_approx, str_name, str_role, list_uniqueID, str_canton)
+                    if bln_print:
+                        print('*******************', str_name, term_approx)
 
 
     return str_name, str_role, list_uniqueID, str_canton
@@ -829,4 +852,4 @@ def get_list_cantons(df_names):
     list_citizenship = [city[:-5] for item in list_citizenship for city in item.split(',')]
     list_firstname = list(df_temp['FirstName'])
 
-    return list_cantonname +  list_cantonabbr + list_citizenship + list_firstname
+    return list_cantonname, list_cantonabbr, list_citizenship, list_firstname