notebook and new function in utils_ner to compare NER output

1c87c8de · Lili Gasser · d76193e1 · 1c87c8de · 1c87c8de
Commit 1c87c8de authored 6 years ago by Lili Gasser
--- a/notebooks/NER_compare-output.ipynb
+++ b/notebooks/NER_compare-output.ipynb
--- a/src/python/utils_ner.py
+++ b/src/python/utils_ner.py
@@ -2,10 +2,11 @@

 import datetime
 from spacy import displacy
+from IPython.display import display, Markdown
 colors = {'ORG': '#73c6b6', 'LOC': '#bb8fce', 'PER': '#e59866', 'MISC': '#a6acaf'}
 options = {'ents': ['ORG', 'LOC', 'PER', 'MISC'], 'colors': colors}
-colors_sner = {'ORGANIZATION': '#73c6b6', 'LOCATION': '#bb8fce', 'PERSON': '#e59866', 'MISC': '#a6acaf'}
-options_sner = {'ents': ['ORGANIZATION', 'LOCATION', 'PERSON', 'MISC'], 'colors': colors_sner}
+colors_sner_german = {'ORGANIZATION': '#73c6b6', 'LOCATION': '#bb8fce', 'PERSON': '#e59866', 'MISC': '#a6acaf'}
+options_sner_german = {'ents': ['ORGANIZATION', 'LOCATION', 'PERSON', 'MISC'], 'colors': colors_sner_german}
 colors_sner_french = {'I-ORG': '#73c6b6', 'I-LIEU': '#bb8fce', 'I-PERS': '#e59866'}
 options_sner_french = {'ents': ['I-ORG', 'I-LOC', 'I-PERS'], 'colors': colors_sner_french}

@@ -93,15 +94,70 @@ def read_from_txt(filename):

    return alldicts

+
+
 def render_dict(alldicts, language):
+    # load rendering options 
    if language == 'german':
-        options = options_sner
+        options = options_sner_german
    if language == 'french':
        options = options_sner_french
+
+    # for each sentence
    for sent_key, sent_dict in alldicts.items():
+        # render
        displacy.render(sent_dict, style='ent', jupyter=True, manual=True, options=options)


+
+def compare_dicts(list_dicts, language, list_titles = ['Dictionary 1', 'Dictionary 2']):
+    # load rendering options 
+    if language == 'german':
+        options = options_sner_german
+    if language == 'french':
+        options = options_sner_french
+
+    # get dictionaries
+    dict1 = list_dicts[0]
+    dict2 = list_dicts[1]
+
+
+    # if the two dictionaries are from the same document
+    # (tested by checking whether they have the same keys)
+    if dict1.keys() == dict2.keys():
+        # for each sentence
+        for sent_key, sent_dict1 in dict1.items():
+            # display title
+            display(Markdown('## {}'.format(sent_dict1['title'])))
+
+            # update title in sentence dictionary
+            title_copy = sent_dict1['title']
+            sent_dict1['title'] = list_titles[0]
+
+            # render
+            displacy.render(sent_dict1, style='ent', jupyter=True, manual=True, options=options)
+
+            # reset title
+            sent_dict1['title'] = title_copy
+
+            # get second dictionary
+            sent_dict2 = dict2[sent_key]
+
+            # update title in sentence dictionary
+            title_copy = sent_dict2['title']
+            sent_dict2['title'] = list_titles[1]
+
+            # render
+            displacy.render(sent_dict2, style='ent', jupyter=True, manual=True, options=options)
+
+            # reset title
+            sent_dict2['title'] = title_copy
+            print('\n')
+
+    else:
+        print('These dictionaries are from different documents and not comparable.')
+
+
 def transform_to_training_format(alldicts):

    def get_entitities_in_training_format(list_ents):
@@ -173,5 +229,10 @@ def transform_to_reading_format(train_data):


 def get_language(filepath):
-    language = 'french' if filepath.endswith('_french.txt') else 'german'
+    if 'german' in filepath:
+        language = 'german'
+    elif 'french' in filepath:
+        language = 'french'
+    else:
+        language = 'german'
    return language