diff --git a/src/python/extractMPs.py b/src/python/extractMPs.py index 01f87b7674b82d4895ce39566b243e5629bf0175..a0f3cc24ec24691c00b8457319c45fb32ed4e91e 100644 --- a/src/python/extractMPs.py +++ b/src/python/extractMPs.py @@ -44,31 +44,31 @@ class MPs_Extractor(object): str_comp = 'comp' str_canton2 = 'canton' - # function to split lastname and save meaningful part(s) to list + # function to split lastname and save meaningful part(s) to list def split_lastname(lastname, uniqueID, tpl_canton, str_canton = ''): - # if last name is a composite name, e.g. 'von Arx' and 'de Stoppani' + # if last name is a composite name, e.g. 'von Arx' and 'de Stoppani' lastname_split = lastname.split() if len(lastname_split) > 1: for item in lastname_split: if item not in ['von', 'de', 'Ab', 'van']: - # write distinctive item to extended list + # write distinctive item to extended list if str_canton: list_names.append((str_canton2, item, str_canton, uniqueID) + tpl_canton) else: list_names.append((str_comp, item, lastname, uniqueID) + tpl_canton) else: - # if last name is a double name, e.g. 'Meier-Müller' + # if last name is a double name, e.g. 'Meier-Müller' lastname_split2 = lastname.replace('-', ' ').split() if len(lastname_split2) > 1: - # write each part of double name into corresponding list + # write each part of double name into corresponding list for item in lastname_split2: if str_canton: list_names.append((str_canton2, item, str_canton, uniqueID) + tpl_canton) else: list_names.append((str_double, item, lastname, uniqueID) + tpl_canton) - # write double name into list + # write double name into list list_names.append((str_double, lastname, lastname, uniqueID) + tpl_canton) - # write double name without space into list + # write double name without space into list list_names.append((str_double, ''.join(lastname.split('-')), lastname, uniqueID) + tpl_canton) else: if str_canton: @@ -98,6 +98,7 @@ class MPs_Extractor(object): #print(df_temp) # if there is an extra double name + # TODO: maybe easier by just adding second part as additional term, could then also be used to account for Wohnort if df_temp.iloc[0]['DoubleName'] != '': # extract unique index uniqueID = df_temp.iloc[0]['uniqueIndex']