From 3cc21a0384ffe8c9137dfd7edb3ddb6c201eaa8f Mon Sep 17 00:00:00 2001 From: Lili Gasser <gasserli@ethz.ch> Date: Mon, 18 Feb 2019 09:53:47 +0000 Subject: [PATCH] trained data specified in function call --- src/python/example_train-ner.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/python/example_train-ner.py b/src/python/example_train-ner.py index 86ae826a..0f61d753 100644 --- a/src/python/example_train-ner.py +++ b/src/python/example_train-ner.py @@ -27,14 +27,15 @@ from utils_proc import call_with_out model=("Model name. Defaults to blank 'en' model.", "option", "m", str), output_dir=("Optional output directory", "option", "o", Path), n_iter=("Number of training iterations", "option", "n", int), - train_data=("Training data. So far document-wise.", "option", "t", Path), + training_data=("Training data. So far document-wise.", "option", "t", Path), + trained_data=("Trained data. Model generated from training run on training data.", "option", "u", Path), print_output=("Print output. Boolean.", "option", "p", bool) ) -def main(model=None, output_dir=None, n_iter=100, train_data=None, print_output=False): +def main(model=None, output_dir=None, n_iter=100, training_data=None, trained_data=None, print_output=False): """Load training data and the model, set up the pipeline and train the entity recognizer.""" - if train_data is not None: - call_with_out("git-lfs pull -I " + train_data.as_posix()) - dict_onedoc = read_from_txt(train_data) + if training_data is not None: + call_with_out("git-lfs pull -I " + training_data.as_posix()) + dict_onedoc = read_from_txt(training_data) TRAIN_DATA = transform_to_training_format(dict_onedoc)[:50] # TODO: get rid of [:50] TRAIN_DATA_orig = TRAIN_DATA[:] print(type(TRAIN_DATA), TRAIN_DATA[:10]) @@ -107,8 +108,8 @@ def main(model=None, output_dir=None, n_iter=100, train_data=None, print_output= alldicts_tested = transform_to_reading_format(TRAIN_DATA_tested) - filename_tested = str(train_data)[:-4] + '_trained.txt' - write_to_txt(alldicts_tested, filename_tested) + if trained_data is not None: + write_to_txt(alldicts_tested, trained_data) # save model to output directory if output_dir is not None: -- GitLab