Skip to content
Snippets Groups Projects
Commit 1c87c8de authored by Lili Gasser's avatar Lili Gasser
Browse files

notebook and new function in utils_ner to compare NER output

parent d76193e1
No related branches found
No related tags found
No related merge requests found
source diff could not be displayed: it is too large. Options to address this: view the blob.
......@@ -2,10 +2,11 @@
import datetime
from spacy import displacy
from IPython.display import display, Markdown
colors = {'ORG': '#73c6b6', 'LOC': '#bb8fce', 'PER': '#e59866', 'MISC': '#a6acaf'}
options = {'ents': ['ORG', 'LOC', 'PER', 'MISC'], 'colors': colors}
colors_sner = {'ORGANIZATION': '#73c6b6', 'LOCATION': '#bb8fce', 'PERSON': '#e59866', 'MISC': '#a6acaf'}
options_sner = {'ents': ['ORGANIZATION', 'LOCATION', 'PERSON', 'MISC'], 'colors': colors_sner}
colors_sner_german = {'ORGANIZATION': '#73c6b6', 'LOCATION': '#bb8fce', 'PERSON': '#e59866', 'MISC': '#a6acaf'}
options_sner_german = {'ents': ['ORGANIZATION', 'LOCATION', 'PERSON', 'MISC'], 'colors': colors_sner_german}
colors_sner_french = {'I-ORG': '#73c6b6', 'I-LIEU': '#bb8fce', 'I-PERS': '#e59866'}
options_sner_french = {'ents': ['I-ORG', 'I-LOC', 'I-PERS'], 'colors': colors_sner_french}
......@@ -93,15 +94,70 @@ def read_from_txt(filename):
return alldicts
def render_dict(alldicts, language):
# load rendering options
if language == 'german':
options = options_sner
options = options_sner_german
if language == 'french':
options = options_sner_french
# for each sentence
for sent_key, sent_dict in alldicts.items():
# render
displacy.render(sent_dict, style='ent', jupyter=True, manual=True, options=options)
def compare_dicts(list_dicts, language, list_titles = ['Dictionary 1', 'Dictionary 2']):
# load rendering options
if language == 'german':
options = options_sner_german
if language == 'french':
options = options_sner_french
# get dictionaries
dict1 = list_dicts[0]
dict2 = list_dicts[1]
# if the two dictionaries are from the same document
# (tested by checking whether they have the same keys)
if dict1.keys() == dict2.keys():
# for each sentence
for sent_key, sent_dict1 in dict1.items():
# display title
display(Markdown('## {}'.format(sent_dict1['title'])))
# update title in sentence dictionary
title_copy = sent_dict1['title']
sent_dict1['title'] = list_titles[0]
# render
displacy.render(sent_dict1, style='ent', jupyter=True, manual=True, options=options)
# reset title
sent_dict1['title'] = title_copy
# get second dictionary
sent_dict2 = dict2[sent_key]
# update title in sentence dictionary
title_copy = sent_dict2['title']
sent_dict2['title'] = list_titles[1]
# render
displacy.render(sent_dict2, style='ent', jupyter=True, manual=True, options=options)
# reset title
sent_dict2['title'] = title_copy
print('\n')
else:
print('These dictionaries are from different documents and not comparable.')
def transform_to_training_format(alldicts):
def get_entitities_in_training_format(list_ents):
......@@ -173,5 +229,10 @@ def transform_to_reading_format(train_data):
def get_language(filepath):
language = 'french' if filepath.endswith('_french.txt') else 'german'
if 'german' in filepath:
language = 'german'
elif 'french' in filepath:
language = 'french'
else:
language = 'german'
return language
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment