Skip to content
Snippets Groups Projects
Commit 09ce8031 authored by Lili Gasser's avatar Lili Gasser
Browse files

add french training data

parent e17e664e
No related branches found
No related tags found
No related merge requests found
Pipeline #1938 passed
......@@ -281,3 +281,4 @@ Pipfile.lock
# NER training files
data/train_NER/1[0-9][0-9][0-9]_20[0-9][0-9][0-9][0-9][0-9][0-9].txt
data/train_NER/1[0-9][0-9][0-9]_20[0-9][0-9][0-9][0-9][0-9][0-9]_french.txt
This diff is collapsed.
......@@ -6,7 +6,8 @@ colors = {'ORG': '#73c6b6', 'LOC': '#bb8fce', 'PER': '#e59866', 'MISC': '#a6acaf
options = {'ents': ['ORG', 'LOC', 'PER', 'MISC'], 'colors': colors}
colors_sner = {'ORGANIZATION': '#73c6b6', 'LOCATION': '#bb8fce', 'PERSON': '#e59866', 'MISC': '#a6acaf'}
options_sner = {'ents': ['ORGANIZATION', 'LOCATION', 'PERSON', 'MISC'], 'colors': colors_sner}
colors_sner_french = {'I-ORG': '#73c6b6', 'I-LIEU': '#bb8fce', 'I-PERS': '#e59866'}
options_sner_french = {'ents': ['I-ORG', 'I-LOC', 'I-PERS'], 'colors': colors_sner_french}
......@@ -75,9 +76,13 @@ def read_from_txt(filename):
pass
return alldicts
def render_dict(alldicts):
def render_dict(alldicts, language):
if language == 'german':
options = options_sner
if language == 'french':
options = options_sner_french
for sent_key, sent_dict in alldicts.items():
displacy.render(sent_dict, style='ent', jupyter=True, manual=True, options=options_sner)
displacy.render(sent_dict, style='ent', jupyter=True, manual=True, options=options)
def transform_to_training_format(alldicts):
......@@ -113,4 +118,6 @@ def transform_to_training_format(alldicts):
return train_data
def get_language(filepath):
language = 'french' if filepath.endswith('_french.txt') else 'german'
return language
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment