diff --git a/src/python/run_train_ner.py b/src/python/run_train_ner.py
index 158787244b9fc5f96ab038db13014cc92462047d..485210a437769a7b051b5df7bc5fc60bc8dd8944 100644
--- a/src/python/run_train_ner.py
+++ b/src/python/run_train_ner.py
@@ -36,10 +36,9 @@ def main(model=None, output_dir=None, n_iter=100, training_data=None, trained_da
     if training_data is not None:
         call_with_out("git-lfs pull -I " + training_data.as_posix())
         dict_onedoc = read_from_txt(training_data)
-        TRAIN_DATA = transform_to_training_format(dict_onedoc)[:50]   # TODO: get rid of [:50]
-        TRAIN_DATA_orig = TRAIN_DATA[:]
-        print(type(TRAIN_DATA), TRAIN_DATA[:10])
-        # TODO: format checks
+        TRAIN_DATA = transform_to_training_format(dict_onedoc)
+        TRAIN_DATA_orig = TRAIN_DATA[:]      # save a copy to have an unshuffled version
+        print("Training data loaded"])
     else:
         sys.exit("no training data")
 
@@ -97,14 +96,6 @@ def main(model=None, output_dir=None, n_iter=100, training_data=None, trained_da
         dict_ents_test['entities'] = list_ents_test
         tpl = (text, dict_ents_test, title)
         TRAIN_DATA_tested.append(tpl)
-#        print('train', list_ents_train)
-#        print('test', list_ents_test)
-#        print(set(list_ents_train) == set(list_ents_test))
-#       if print_output
-        #if not set(list_ents_train) == set(list_ents_test):
-            #print(text)
-            #print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
-	    #print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])
 
     alldicts_tested = transform_to_reading_format(TRAIN_DATA_tested)