Skip to content
Snippets Groups Projects
Commit c81c92bf authored by Lili Gasser's avatar Lili Gasser
Browse files

WIP

parent fcd4ebea
No related branches found
No related tags found
No related merge requests found
...@@ -362,7 +362,7 @@ def label_speechstart(XML_new, ind_p, ind_t, text, ind_tl_colon, df_names, list_ ...@@ -362,7 +362,7 @@ def label_speechstart(XML_new, ind_p, ind_t, text, ind_tl_colon, df_names, list_
this_is_speech = True this_is_speech = True
if bln_print: if bln_print:
print('found a name:', text_start, list_oi, str_name, str_role, '\n') print('found a name:', text_start, list_oi, str_name, str_role, '\n')
print('found a name:', text_start, list_oi, str_name, str_role, list_uniqueID, '\n') print('found a name:', text_start, list_oi, ind_tl_colon, str_name, str_role, list_uniqueID, '\n')
return XML_new, this_is_speech return XML_new, this_is_speech
...@@ -624,7 +624,10 @@ def find_names(list_oi, df_names, list_notnames, bln_print=False): ...@@ -624,7 +624,10 @@ def find_names(list_oi, df_names, list_notnames, bln_print=False):
# if Citizenship, do proper comparison # if Citizenship, do proper comparison
if canton_type == 'Citizenship': if canton_type == 'Citizenship':
df_temp = df_names.loc[(df_names['type']==name_type) & (df_names['name_short']==str_name)] df_temp = df_names.loc[(df_names['type']==name_type) & (df_names['name_short']==str_name)]
list_citizenship = [term for term in df_temp[canton_type] if str_canton in tokenizer_canton.tokenize(term)] list_citizenship = get_cities(list(df_temp[canton_type]))
print(list_citizenship)
list_citizenship = [entry for entry in df_temp[canton_type] if str_canton in get_cities([entry])]
print(list_citizenship)
str_citizenship = '' str_citizenship = ''
try: try:
if len(list_citizenship) == 1: if len(list_citizenship) == 1:
...@@ -733,6 +736,8 @@ def label_language(XML_new, ind_p, ind_t, aux_dict_l): ...@@ -733,6 +736,8 @@ def label_language(XML_new, ind_p, ind_t, aux_dict_l):
return XML_new return XML_new
def get_cities(list_citizenship):
return [city[:-5] for item in list_citizenship for city in item.split(',')]
# function to get list of places # function to get list of places
def get_list_cantons(df_names, str_name = ''): def get_list_cantons(df_names, str_name = ''):
...@@ -751,7 +756,7 @@ def get_list_cantons(df_names, str_name = ''): ...@@ -751,7 +756,7 @@ def get_list_cantons(df_names, str_name = ''):
list_cantonname.extend(['Berne']) list_cantonname.extend(['Berne'])
list_cantonabbr = list(df_temp['CantonAbbreviation']) list_cantonabbr = list(df_temp['CantonAbbreviation'])
list_citizenship = list(df_temp['Citizenship']) list_citizenship = list(df_temp['Citizenship'])
list_citizenship = [city[:-5] for item in list_citizenship for city in item.split(',')] list_citizenship = get_cities(list_citizenship)
list_firstname = list(df_temp['FirstName']) list_firstname = list(df_temp['FirstName'])
return list_cantonname, list_cantonabbr, list_citizenship, list_firstname return list_cantonname, list_cantonabbr, list_citizenship, list_firstname
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment