From efae73fd8864137fc271f922799b7d6d353afe69 Mon Sep 17 00:00:00 2001 From: Clemens Hutter <huttercl@student.ethz.ch> Date: Tue, 12 Nov 2019 17:52:12 +0100 Subject: [PATCH] textlines become duplicated between 02 and 04 --- src/python/preproc_docs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/python/preproc_docs.py b/src/python/preproc_docs.py index 70946ac2..6b4b805a 100644 --- a/src/python/preproc_docs.py +++ b/src/python/preproc_docs.py @@ -951,6 +951,7 @@ def group_textl_create_xml(xml_t, set_of_blocks, ref_textl, labels_textl, rescal all_bbox = np.array([]).reshape((4,0)) + # TODO note clemens: this might be the reason there are still textlines with raw character children at the end of the document # Just add the two final elements from the original xml page_el.append(xml_t[0][-2]) # Figure page_el.append(xml_t[0][-2]) # Layout -- GitLab