From c81c92bf7637511e4b3a115624dad6a9e5accd1a Mon Sep 17 00:00:00 2001
From: Lilian Gasser <gasserli@ethz.ch>
Date: Tue, 22 Jan 2019 16:31:02 +0100
Subject: [PATCH] WIP

---
 src/python/utils_annot.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/python/utils_annot.py b/src/python/utils_annot.py
index c9895333..21fe6f31 100644
--- a/src/python/utils_annot.py
+++ b/src/python/utils_annot.py
@@ -362,7 +362,7 @@ def label_speechstart(XML_new, ind_p, ind_t, text, ind_tl_colon, df_names, list_
             this_is_speech = True
             if bln_print:
                 print('found a name:', text_start, list_oi, str_name, str_role, '\n')
-            print('found a name:', text_start, list_oi, str_name, str_role, list_uniqueID, '\n')
+            print('found a name:', text_start, list_oi, ind_tl_colon, str_name, str_role, list_uniqueID, '\n')
 
     return XML_new, this_is_speech
 
@@ -624,7 +624,10 @@ def find_names(list_oi, df_names, list_notnames, bln_print=False):
                 # if Citizenship, do proper comparison
                 if canton_type == 'Citizenship':
                     df_temp = df_names.loc[(df_names['type']==name_type) & (df_names['name_short']==str_name)]
-                    list_citizenship = [term for term in df_temp[canton_type] if str_canton in tokenizer_canton.tokenize(term)]
+                    list_citizenship = get_cities(list(df_temp[canton_type]))
+                    print(list_citizenship)
+                    list_citizenship = [entry for entry in df_temp[canton_type] if str_canton in get_cities([entry])]
+                    print(list_citizenship)
                     str_citizenship = ''
                     try:
                         if len(list_citizenship) == 1:
@@ -733,6 +736,8 @@ def label_language(XML_new, ind_p, ind_t, aux_dict_l):
 
     return XML_new
 
+def get_cities(list_citizenship):
+    return [city[:-5] for item in list_citizenship for city in item.split(',')]
 
 # function to get list of places
 def get_list_cantons(df_names, str_name = ''):
@@ -751,7 +756,7 @@ def get_list_cantons(df_names, str_name = ''):
         list_cantonname.extend(['Berne'])
     list_cantonabbr = list(df_temp['CantonAbbreviation'])
     list_citizenship = list(df_temp['Citizenship'])
-    list_citizenship = [city[:-5] for item in list_citizenship for city in item.split(',')]
+    list_citizenship = get_cities(list_citizenship)
     list_firstname = list(df_temp['FirstName'])
 
     return list_cantonname, list_cantonabbr, list_citizenship, list_firstname
-- 
GitLab