diff --git a/data/lists/wrongly_identified_speakers.txt b/data/lists/wrongly_identified_speakers.txt index 3303c2891696b60eaaf254bfbee69fcba8567385..038c497e8e720ad68faf7fddd016e3452b70e468 100644 --- a/data/lists/wrongly_identified_speakers.txt +++ b/data/lists/wrongly_identified_speakers.txt @@ -76,11 +76,6 @@ list of people in a minority are recognized as speech starts: 1936/20031990: many, e.g. Widmer (CANTON MISSING) [5696, 5703] 1936-01-14 00:00 0 ['Walter', 'Olten', 'Widmer', 'Rossi'] -President not found: -------------------- -1921/20029085: Müller (CANTON MISSING) Président [3663, 3695] 1921-01-19 00:00 9 ['Mlle', 'Président'] - - misspelled role: ---------------- 1936/20031986: Meyer (CANTON MISSING) [3482, 3483, 3488, 3490] 1936-01-09 00:00 14 ['Rundespräsident', 'Meyer'] @@ -88,7 +83,6 @@ misspelled role: 1932/20031299: Häberlin (CANTON MISSING) [2290, 2287] 1932-09-21 00:00 6 ['Bimdesrat', 'Häberlin'] - weird layout: ------------- 1971/20000663: de MM. Knüsel et Leu (there must be more speech starts, this is from a list of cantons and people inside a speech, !!! Layout) diff --git a/src/python/utils_annot.py b/src/python/utils_annot.py index 8f81d0723b7af07ac6bf647211768198e70d0f22..63622c1b8fa61780481695b0d57b4defb6a13f0f 100644 --- a/src/python/utils_annot.py +++ b/src/python/utils_annot.py @@ -900,6 +900,8 @@ def get_list_cantons(df_names, str_name, str_council = '', str_firstname = ''): str_Citizenship = 'Citizenship' str_FirstName = 'FirstName' str_additionalInfo = 'additionalInfo' + str_additionalInfo2 = 'additionalInfo2' + str_additionalInfo3 = 'additionalInfo3' # get dataframe df_temp = get_df_temp(df_names, str_name, str_council, str_firstname) @@ -919,17 +921,20 @@ def get_list_cantons(df_names, str_name, str_council = '', str_firstname = ''): # list of additional information list_additionalInfo = list(df_temp[str_additionalInfo]) + list_additionalInfo2 = list(df_temp[str_additionalInfo2]) + list_additionalInfo3 = list(df_temp[str_additionalInfo3]) # generate list of cantons including string # additionalinfo should be before citizenship (helps to find people which have same citizenship but a specified addtionalinfo) list_cantons = [(list_cantonname, str_CantonName), (list_cantonabbr, str_CantonAbbreviation), (list_additionalInfo, str_additionalInfo), + (list_additionalInfo2, str_additionalInfo2), + (list_additionalInfo3, str_additionalInfo3), (list_citizenship, str_Citizenship), (list_firstname, str_FirstName), ] -# return list_cantonname, list_cantonabbr, list_citizenship, list_firstname, list_additionalInfo return list_cantons