Skip to content
Snippets Groups Projects
Commit fa192339 authored by Lili Gasser's avatar Lili Gasser
Browse files

clean ffille

parent 80393a74
No related branches found
No related tags found
No related merge requests found
......@@ -36,10 +36,9 @@ def main(model=None, output_dir=None, n_iter=100, training_data=None, trained_da
if training_data is not None:
call_with_out("git-lfs pull -I " + training_data.as_posix())
dict_onedoc = read_from_txt(training_data)
TRAIN_DATA = transform_to_training_format(dict_onedoc)[:50] # TODO: get rid of [:50]
TRAIN_DATA_orig = TRAIN_DATA[:]
print(type(TRAIN_DATA), TRAIN_DATA[:10])
# TODO: format checks
TRAIN_DATA = transform_to_training_format(dict_onedoc)
TRAIN_DATA_orig = TRAIN_DATA[:] # save a copy to have an unshuffled version
print("Training data loaded")
else:
sys.exit("no training data")
......@@ -97,14 +96,6 @@ def main(model=None, output_dir=None, n_iter=100, training_data=None, trained_da
dict_ents_test['entities'] = list_ents_test
tpl = (text, dict_ents_test, title)
TRAIN_DATA_tested.append(tpl)
# print('train', list_ents_train)
# print('test', list_ents_test)
# print(set(list_ents_train) == set(list_ents_test))
# if print_output
#if not set(list_ents_train) == set(list_ents_test):
#print(text)
#print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
#print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])
alldicts_tested = transform_to_reading_format(TRAIN_DATA_tested)
......
#!/bin/bash
model=de_core_news_sm
n_iter=2
n_iter=100
path_data=data/train_NER/
training_data=${path_data}1891_20026449_corrected_german.txt
trained_data=${path_data}1891_20026449_corrected_german_trained.txt
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment