From efae73fd8864137fc271f922799b7d6d353afe69 Mon Sep 17 00:00:00 2001
From: Clemens Hutter <huttercl@student.ethz.ch>
Date: Tue, 12 Nov 2019 17:52:12 +0100
Subject: [PATCH] textlines become duplicated between 02 and 04

---
 src/python/preproc_docs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/python/preproc_docs.py b/src/python/preproc_docs.py
index 70946ac2..6b4b805a 100644
--- a/src/python/preproc_docs.py
+++ b/src/python/preproc_docs.py
@@ -951,6 +951,7 @@ def group_textl_create_xml(xml_t, set_of_blocks, ref_textl, labels_textl, rescal
             all_bbox = np.array([]).reshape((4,0))
 
 
+    # TODO note clemens: this might be the reason there are still textlines with raw character children at the end of the document
     # Just add the two final elements from the original xml
     page_el.append(xml_t[0][-2]) # Figure
     page_el.append(xml_t[0][-2]) # Layout
-- 
GitLab