Skip to content
Snippets Groups Projects
Commit 3cc21a03 authored by Lili Gasser's avatar Lili Gasser
Browse files

trained data specified in function call

parent b73b5e2c
No related branches found
No related tags found
No related merge requests found
......@@ -27,14 +27,15 @@ from utils_proc import call_with_out
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
output_dir=("Optional output directory", "option", "o", Path),
n_iter=("Number of training iterations", "option", "n", int),
train_data=("Training data. So far document-wise.", "option", "t", Path),
training_data=("Training data. So far document-wise.", "option", "t", Path),
trained_data=("Trained data. Model generated from training run on training data.", "option", "u", Path),
print_output=("Print output. Boolean.", "option", "p", bool)
)
def main(model=None, output_dir=None, n_iter=100, train_data=None, print_output=False):
def main(model=None, output_dir=None, n_iter=100, training_data=None, trained_data=None, print_output=False):
"""Load training data and the model, set up the pipeline and train the entity recognizer."""
if train_data is not None:
call_with_out("git-lfs pull -I " + train_data.as_posix())
dict_onedoc = read_from_txt(train_data)
if training_data is not None:
call_with_out("git-lfs pull -I " + training_data.as_posix())
dict_onedoc = read_from_txt(training_data)
TRAIN_DATA = transform_to_training_format(dict_onedoc)[:50] # TODO: get rid of [:50]
TRAIN_DATA_orig = TRAIN_DATA[:]
print(type(TRAIN_DATA), TRAIN_DATA[:10])
......@@ -107,8 +108,8 @@ def main(model=None, output_dir=None, n_iter=100, train_data=None, print_output=
alldicts_tested = transform_to_reading_format(TRAIN_DATA_tested)
filename_tested = str(train_data)[:-4] + '_trained.txt'
write_to_txt(alldicts_tested, filename_tested)
if trained_data is not None:
write_to_txt(alldicts_tested, trained_data)
# save model to output directory
if output_dir is not None:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment