diff --git a/src/python/example_train-ner.py b/src/python/example_train-ner.py index 86ae826a78e406bfc8d5ebd7b0986247bb9287c1..0f61d7534731c7ced0bd8bea6fab4adf77a4bca3 100644 --- a/src/python/example_train-ner.py +++ b/src/python/example_train-ner.py @@ -27,14 +27,15 @@ from utils_proc import call_with_out model=("Model name. Defaults to blank 'en' model.", "option", "m", str), output_dir=("Optional output directory", "option", "o", Path), n_iter=("Number of training iterations", "option", "n", int), - train_data=("Training data. So far document-wise.", "option", "t", Path), + training_data=("Training data. So far document-wise.", "option", "t", Path), + trained_data=("Trained data. Model generated from training run on training data.", "option", "u", Path), print_output=("Print output. Boolean.", "option", "p", bool) ) -def main(model=None, output_dir=None, n_iter=100, train_data=None, print_output=False): +def main(model=None, output_dir=None, n_iter=100, training_data=None, trained_data=None, print_output=False): """Load training data and the model, set up the pipeline and train the entity recognizer.""" - if train_data is not None: - call_with_out("git-lfs pull -I " + train_data.as_posix()) - dict_onedoc = read_from_txt(train_data) + if training_data is not None: + call_with_out("git-lfs pull -I " + training_data.as_posix()) + dict_onedoc = read_from_txt(training_data) TRAIN_DATA = transform_to_training_format(dict_onedoc)[:50] # TODO: get rid of [:50] TRAIN_DATA_orig = TRAIN_DATA[:] print(type(TRAIN_DATA), TRAIN_DATA[:10]) @@ -107,8 +108,8 @@ def main(model=None, output_dir=None, n_iter=100, train_data=None, print_output= alldicts_tested = transform_to_reading_format(TRAIN_DATA_tested) - filename_tested = str(train_data)[:-4] + '_trained.txt' - write_to_txt(alldicts_tested, filename_tested) + if trained_data is not None: + write_to_txt(alldicts_tested, trained_data) # save model to output directory if output_dir is not None: