From 91738e13a0798ca655f815f23c3972499ed6e2af Mon Sep 17 00:00:00 2001 From: Lili Gasser <gasserli@ethz.ch> Date: Mon, 18 Feb 2019 10:32:51 +0000 Subject: [PATCH] clean ffille --- src/python/run_train_ner.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/src/python/run_train_ner.py b/src/python/run_train_ner.py index 15878724..485210a4 100644 --- a/src/python/run_train_ner.py +++ b/src/python/run_train_ner.py @@ -36,10 +36,9 @@ def main(model=None, output_dir=None, n_iter=100, training_data=None, trained_da if training_data is not None: call_with_out("git-lfs pull -I " + training_data.as_posix()) dict_onedoc = read_from_txt(training_data) - TRAIN_DATA = transform_to_training_format(dict_onedoc)[:50] # TODO: get rid of [:50] - TRAIN_DATA_orig = TRAIN_DATA[:] - print(type(TRAIN_DATA), TRAIN_DATA[:10]) - # TODO: format checks + TRAIN_DATA = transform_to_training_format(dict_onedoc) + TRAIN_DATA_orig = TRAIN_DATA[:] # save a copy to have an unshuffled version + print("Training data loaded"]) else: sys.exit("no training data") @@ -97,14 +96,6 @@ def main(model=None, output_dir=None, n_iter=100, training_data=None, trained_da dict_ents_test['entities'] = list_ents_test tpl = (text, dict_ents_test, title) TRAIN_DATA_tested.append(tpl) -# print('train', list_ents_train) -# print('test', list_ents_test) -# print(set(list_ents_train) == set(list_ents_test)) -# if print_output - #if not set(list_ents_train) == set(list_ents_test): - #print(text) - #print("Entities", [(ent.text, ent.label_) for ent in doc.ents]) - #print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc]) alldicts_tested = transform_to_reading_format(TRAIN_DATA_tested) -- GitLab