diff --git a/notebooks/DuplicatedTextboxBug.ipynb b/notebooks/DuplicatedTextboxBug.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..dcfb085d9e2d2cd40bec7144257518d306e606d3
--- /dev/null
+++ b/notebooks/DuplicatedTextboxBug.ipynb
@@ -0,0 +1,3516 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, sys\n",
+    "\n",
+    "sys.path.append('../src/python/')\n",
+    "\n",
+    "import xml.etree.ElementTree as ET\n",
+    "import numpy as np\n",
+    "import utils_proc"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 114,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path = \"../data/AB_other/SessionOverviews_tar\"\n",
+    "is_dublicated= lambda l: np.array([l.count(x)>1 for x in l])\n",
+    "\n",
+    "boolean_index = lambda l, mask: [x for x, i in zip(l, mask) if i==True]\n",
+    "\n",
+    "id_string_template = \"{year}_{file}_{page}_{bbox}\"\n",
+    "\n",
+    "global collection_of_duplicates_corrected_xml\n",
+    "collection_of_duplicates_corrected_xml = dict()\n",
+    "\n",
+    "\n",
+    "\n",
+    "def duplicates_in_file(fp, fp_raw, year=None, file=None, ignore_dubplicates_on_last_page=False):\n",
+    "    #full_path = os.path.join(path, file_name)\n",
+    "    #os.path.exists(full_path)\n",
+    "    tree = ET.parse(fp)\n",
+    "\n",
+    "    all_pages = list(tree.findall(\"./page\"))\n",
+    "    n_pages = len(all_pages)\n",
+    "    for page in all_pages:\n",
+    "        all_textlines = list(page.findall(\".//textline\"))\n",
+    "        pages_left = n_pages - int(page.attrib[\"id\"])-1\n",
+    "\n",
+    "        bboxes = [t.attrib[\"bbox\"] for t in all_textlines]\n",
+    "        duplicated_bboxes = np.array(bboxes)[is_dublicated(bboxes)]\n",
+    "        duplicate_bbox = np.unique(duplicated_bboxes)\n",
+    "        for bbox in duplicate_bbox:\n",
+    "            if pages_left > 0 or not ignore_dubplicates_on_last_page:\n",
+    "\n",
+    "                repetitions_in_file = boolean_index(all_textlines, np.array(bboxes) == bbox)\n",
+    "                repetitions_xml = [ET.tostring(r) for r in repetitions_in_file]\n",
+    "                all_the_same = len(set(repetitions_xml)) ==1\n",
+    "                \n",
+    "                \n",
+    "                \n",
+    "                id_str = id_string_template.format(year=year, file=file, page=page.attrib[\"id\"], bbox=bbox)\n",
+    "                collection_of_duplicates_corrected_xml[id_str] = repetitions_in_file\n",
+    "                print(\"year: {}, document: {}\".format(year, file_id))\n",
+    "                print(\"coords: {}, page_id: {}, pages_left: {}, n_reps: {}\".format(bbox, \n",
+    "                                                                                page.attrib[\"id\"], \n",
+    "                                                                                pages_left, \n",
+    "                                                                                len(repetitions_in_file)))\n",
+    "                #print(\"id: {}\".format(id_str))\n",
+    "                \n",
+    "                print(\"all repetitions are the same: {}\".format(all_the_same))\n",
+    "                #print(\"length of repetition xml {}\".format(length_of_repetitions))\n",
+    "                \n",
+    "                \n",
+    "                # now we try to find it in the raw xml\n",
+    "                if fp_raw is not None:\n",
+    "                    try:\n",
+    "                        tree_raw = ET.parse(fp_raw)\n",
+    "                        in_reference = tree_raw.findall(\".//textline[@bbox='{}']\".format(bbox))\n",
+    "                        print(\"number of textlines with this bbox in 02_xml {}\".format(len(in_reference)))\n",
+    "                    except:\n",
+    "                        print(\"Couldn't parse 02_xml {}/{}\".format(year, file))\n",
+    "\n",
+    "\n",
+    "                \n",
+    "                print(\"--------------------------\")\n",
+    "            \n",
+    "        #print(ET.tostring(l))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 115,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "../data/AB_other/SessionOverviews_tar/1918/04_correctedxml.tar.gz\n",
+      "year: 1918, document: 110000379_datacorr\n",
+      "coords: 80.000,252.230,456.450,263.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1918, document: 110000379_datacorr\n",
+      "coords: 80.000,267.230,450.770,278.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1918/110000379_datacorr\n",
+      "--------------------------\n",
+      "year: 1918, document: 110000379_datacorr\n",
+      "coords: 80.000,282.230,489.100,293.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1918/110000379_datacorr\n",
+      "--------------------------\n",
+      "year: 1918, document: 110000387_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1918, document: 110000387_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1918/110000387_datacorr\n",
+      "--------------------------\n",
+      "year: 1918, document: 110000387_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1918/110000387_datacorr\n",
+      "--------------------------\n",
+      "year: 1918, document: 110000391_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 1, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1918, document: 110000391_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 1, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1918/110000391_datacorr\n",
+      "--------------------------\n",
+      "year: 1918, document: 110000391_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 1, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1918/110000391_datacorr\n",
+      "--------------------------\n",
+      "year: 1918, document: 110000395_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1918, document: 110000395_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1918/110000395_datacorr\n",
+      "--------------------------\n",
+      "year: 1918, document: 110000395_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1918/110000395_datacorr\n",
+      "--------------------------\n",
+      "year: 1918, document: 110000383_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1918, document: 110000383_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1918/110000383_datacorr\n",
+      "--------------------------\n",
+      "year: 1918, document: 110000383_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1918/110000383_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1935/04_correctedxml.tar.gz\n",
+      "year: 1935, document: 110000695_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 22, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1935, document: 110000695_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 22, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1935, document: 110000695_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 22, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1935, document: 110000683_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 22, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1935, document: 110000683_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 22, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1935/110000683_datacorr\n",
+      "--------------------------\n",
+      "year: 1935, document: 110000683_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 22, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1935/110000683_datacorr\n",
+      "--------------------------\n",
+      "year: 1935, document: 110000687_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1935, document: 110000687_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1935, document: 110000687_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1935, document: 110000691_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 21, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1935, document: 110000691_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 21, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1935, document: 110000691_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 21, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1893/04_correctedxml.tar.gz\n",
+      "year: 1893, document: 110000015_datacorr\n",
+      "coords: 138.24,644.4 ,293.04,663.12, page_id: 1, pages_left: 12, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1893, document: 110000015_datacorr\n",
+      "coords: 390.24,642.24,665.28,664.56, page_id: 1, pages_left: 12, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1893/110000015_datacorr\n",
+      "--------------------------\n",
+      "year: 1893, document: 110000015_datacorr\n",
+      "coords: 574.56,624.96,678.24,651.6 , page_id: 1, pages_left: 12, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1893/110000015_datacorr\n",
+      "--------------------------\n",
+      "year: 1893, document: 110000015_datacorr\n",
+      "coords: 645.84,291.6 ,666.72,295.92, page_id: 1, pages_left: 12, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1893/110000015_datacorr\n",
+      "--------------------------\n",
+      "year: 1893, document: 110000015_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1893/110000015_datacorr\n",
+      "--------------------------\n",
+      "year: 1893, document: 110000015_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1893/110000015_datacorr\n",
+      "--------------------------\n",
+      "year: 1893, document: 110000015_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1893/110000015_datacorr\n",
+      "--------------------------\n",
+      "year: 1893, document: 110000017_datacorr\n",
+      "coords: 325.942,378.72 ,474.883,398.88 , page_id: 0, pages_left: 14, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1893, document: 110000017_datacorr\n",
+      "coords:  53.28,298.8 ,285.84,329.04, page_id: 2, pages_left: 12, n_reps: 3\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1893/110000017_datacorr\n",
+      "--------------------------\n",
+      "year: 1893, document: 110000017_datacorr\n",
+      "coords: 320.904,588.24 ,586.405,602.64 , page_id: 10, pages_left: 4, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1893/110000017_datacorr\n",
+      "--------------------------\n",
+      "year: 1893, document: 110000017_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1893/110000017_datacorr\n",
+      "--------------------------\n",
+      "year: 1893, document: 110000017_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1893/110000017_datacorr\n",
+      "--------------------------\n",
+      "year: 1893, document: 110000017_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1893/110000017_datacorr\n",
+      "--------------------------\n",
+      "year: 1893, document: 110000009_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1893, document: 110000009_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1893/110000009_datacorr\n",
+      "--------------------------\n",
+      "year: 1893, document: 110000009_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1893/110000009_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1910/04_correctedxml.tar.gz\n",
+      "year: 1910, document: 110000267_datacorr\n",
+      "coords:  87.807,270.   ,136.749,275.04 , page_id: 7, pages_left: 9, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1910, document: 110000267_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1910/110000267_datacorr\n",
+      "--------------------------\n",
+      "year: 1910, document: 110000267_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1910/110000267_datacorr\n",
+      "--------------------------\n",
+      "year: 1910, document: 110000267_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1910/110000267_datacorr\n",
+      "--------------------------\n",
+      "year: 1910, document: 110000259_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1910, document: 110000259_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1910/110000259_datacorr\n",
+      "--------------------------\n",
+      "year: 1910, document: 110000259_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1910/110000259_datacorr\n",
+      "--------------------------\n",
+      "year: 1910, document: 110000263_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1910, document: 110000263_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1910/110000263_datacorr\n",
+      "--------------------------\n",
+      "year: 1910, document: 110000263_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1910/110000263_datacorr\n",
+      "--------------------------\n",
+      "year: 1910, document: 110000251_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1910, document: 110000251_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1910/110000251_datacorr\n",
+      "--------------------------\n",
+      "year: 1910, document: 110000251_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1910/110000251_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1900/04_correctedxml.tar.gz\n",
+      "year: 1900, document: 110000103_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1900, document: 110000103_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1900/110000103_datacorr\n",
+      "--------------------------\n",
+      "year: 1900, document: 110000103_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1900/110000103_datacorr\n",
+      "--------------------------\n",
+      "year: 1900, document: 110000111_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1900, document: 110000111_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1900, document: 110000111_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1900, document: 110000107_datacorr\n",
+      "coords: 454.174,774.   ,599.567,791.28 , page_id: 3, pages_left: 10, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1900, document: 110000107_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1900/110000107_datacorr\n",
+      "--------------------------\n",
+      "year: 1900, document: 110000107_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1900/110000107_datacorr\n",
+      "--------------------------\n",
+      "year: 1900, document: 110000107_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1900/110000107_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1947/04_correctedxml.tar.gz\n",
+      "year: 1947, document: 110000895_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 36, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1947, document: 110000895_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 36, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1947/110000895_datacorr\n",
+      "--------------------------\n",
+      "year: 1947, document: 110000895_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 36, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1947/110000895_datacorr\n",
+      "--------------------------\n",
+      "year: 1947, document: 110000899_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1947, document: 110000899_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1947/110000899_datacorr\n",
+      "--------------------------\n",
+      "year: 1947, document: 110000899_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1947/110000899_datacorr\n",
+      "--------------------------\n",
+      "year: 1947, document: 110000907_datacorr\n",
+      "coords: 514.08 ,104.339,527.04 ,107.937, page_id: 18, pages_left: 16, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1947, document: 110000907_datacorr\n",
+      "coords: 137.52 ,123.084,277.2  ,153.315, page_id: 26, pages_left: 8, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1947/110000907_datacorr\n",
+      "--------------------------\n",
+      "year: 1947, document: 110000907_datacorr\n",
+      "coords: 475.531, 94.985,509.343,100.022, page_id: 32, pages_left: 2, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1947/110000907_datacorr\n",
+      "--------------------------\n",
+      "year: 1947, document: 110000907_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1947/110000907_datacorr\n",
+      "--------------------------\n",
+      "year: 1947, document: 110000907_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1947/110000907_datacorr\n",
+      "--------------------------\n",
+      "year: 1947, document: 110000907_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1947/110000907_datacorr\n",
+      "--------------------------\n",
+      "year: 1947, document: 110000903_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 32, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1947, document: 110000903_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 32, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1947/110000903_datacorr\n",
+      "--------------------------\n",
+      "year: 1947, document: 110000903_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 32, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1947/110000903_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1936/04_correctedxml.tar.gz\n",
+      "year: 1936, document: 110000703_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1936, document: 110000703_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1936/110000703_datacorr\n",
+      "--------------------------\n",
+      "year: 1936, document: 110000703_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1936/110000703_datacorr\n",
+      "--------------------------\n",
+      "year: 1936, document: 110000715_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 32, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1936, document: 110000715_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 32, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1936/110000715_datacorr\n",
+      "--------------------------\n",
+      "year: 1936, document: 110000715_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 32, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1936/110000715_datacorr\n",
+      "--------------------------\n",
+      "year: 1936, document: 110000711_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1936, document: 110000711_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1936/110000711_datacorr\n",
+      "--------------------------\n",
+      "year: 1936, document: 110000711_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1936/110000711_datacorr\n",
+      "--------------------------\n",
+      "year: 1936, document: 110000699_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 26, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1936, document: 110000699_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 26, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1936/110000699_datacorr\n",
+      "--------------------------\n",
+      "year: 1936, document: 110000699_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 26, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1936/110000699_datacorr\n",
+      "--------------------------\n",
+      "year: 1936, document: 110000707_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 31, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1936, document: 110000707_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 31, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1936/110000707_datacorr\n",
+      "--------------------------\n",
+      "year: 1936, document: 110000707_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 31, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1936/110000707_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1921/04_correctedxml.tar.gz\n",
+      "year: 1921, document: 110000447_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1921, document: 110000447_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1921/110000447_datacorr\n",
+      "--------------------------\n",
+      "year: 1921, document: 110000447_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1921/110000447_datacorr\n",
+      "--------------------------\n",
+      "year: 1921, document: 110000439_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1921, document: 110000439_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1921/110000439_datacorr\n",
+      "--------------------------\n",
+      "year: 1921, document: 110000439_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1921/110000439_datacorr\n",
+      "--------------------------\n",
+      "year: 1921, document: 110000451_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1921, document: 110000451_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1921/110000451_datacorr\n",
+      "--------------------------\n",
+      "year: 1921, document: 110000451_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1921/110000451_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1920/04_correctedxml.tar.gz\n",
+      "year: 1920, document: 110000423_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1920, document: 110000423_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1920/110000423_datacorr\n",
+      "--------------------------\n",
+      "year: 1920, document: 110000423_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1920/110000423_datacorr\n",
+      "--------------------------\n",
+      "year: 1920, document: 110000435_datacorr\n",
+      "coords: 80.000,201.230,456.450,212.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1920, document: 110000435_datacorr\n",
+      "coords: 80.000,216.230,450.770,227.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1920/110000435_datacorr\n",
+      "--------------------------\n",
+      "year: 1920, document: 110000435_datacorr\n",
+      "coords: 80.000,231.230,489.100,242.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1920/110000435_datacorr\n",
+      "--------------------------\n",
+      "year: 1920, document: 110000431_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1920, document: 110000431_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1920/110000431_datacorr\n",
+      "--------------------------\n",
+      "year: 1920, document: 110000431_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1920/110000431_datacorr\n",
+      "--------------------------\n",
+      "year: 1920, document: 110000419_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 22, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1920, document: 110000419_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 22, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1920/110000419_datacorr\n",
+      "--------------------------\n",
+      "year: 1920, document: 110000419_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 22, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1920/110000419_datacorr\n",
+      "--------------------------\n",
+      "year: 1920, document: 110000427_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1920, document: 110000427_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1920/110000427_datacorr\n",
+      "--------------------------\n",
+      "year: 1920, document: 110000427_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1920/110000427_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1944/04_correctedxml.tar.gz\n",
+      "year: 1944, document: 110000851_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 29, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1944, document: 110000851_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 29, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1944/110000851_datacorr\n",
+      "--------------------------\n",
+      "year: 1944, document: 110000851_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 29, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1944/110000851_datacorr\n",
+      "--------------------------\n",
+      "year: 1944, document: 110000847_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1944, document: 110000847_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1944/110000847_datacorr\n",
+      "--------------------------\n",
+      "year: 1944, document: 110000847_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1944/110000847_datacorr\n",
+      "--------------------------\n",
+      "year: 1944, document: 110000843_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1944, document: 110000843_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1944/110000843_datacorr\n",
+      "--------------------------\n",
+      "year: 1944, document: 110000843_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1944/110000843_datacorr\n",
+      "--------------------------\n",
+      "year: 1944, document: 110000855_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 32, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1944, document: 110000855_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 32, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1944/110000855_datacorr\n",
+      "--------------------------\n",
+      "year: 1944, document: 110000855_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 32, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1944/110000855_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1899/04_correctedxml.tar.gz\n",
+      "year: 1899, document: 110000099_datacorr\n",
+      "coords: 485.83 ,382.828,636.978,418.089, page_id: 4, pages_left: 9, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1899, document: 110000099_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1899, document: 110000099_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1899, document: 110000099_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1899, document: 110000091_datacorr\n",
+      "coords: 619.2 , 82.08,694.08, 87.84, page_id: 3, pages_left: 12, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1899, document: 110000091_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1899/110000091_datacorr\n",
+      "--------------------------\n",
+      "year: 1899, document: 110000091_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1899/110000091_datacorr\n",
+      "--------------------------\n",
+      "year: 1899, document: 110000091_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1899/110000091_datacorr\n",
+      "--------------------------\n",
+      "year: 1899, document: 110000095_datacorr\n",
+      "coords: 140.4 , 95.76,431.28,115.2 , page_id: 7, pages_left: 4, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1899, document: 110000095_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1899, document: 110000095_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1899, document: 110000095_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1932/04_correctedxml.tar.gz\n",
+      "year: 1932, document: 110000639_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1932, document: 110000639_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1932/110000639_datacorr\n",
+      "--------------------------\n",
+      "year: 1932, document: 110000639_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1932/110000639_datacorr\n",
+      "--------------------------\n",
+      "year: 1932, document: 110000635_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 6, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1932, document: 110000635_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 6, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1932/110000635_datacorr\n",
+      "--------------------------\n",
+      "year: 1932, document: 110000635_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 6, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1932/110000635_datacorr\n",
+      "--------------------------\n",
+      "year: 1932, document: 110000627_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1932, document: 110000627_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1932/110000627_datacorr\n",
+      "--------------------------\n",
+      "year: 1932, document: 110000627_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1932/110000627_datacorr\n",
+      "--------------------------\n",
+      "year: 1932, document: 110000631_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 27, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1932, document: 110000631_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 27, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1932/110000631_datacorr\n",
+      "--------------------------\n",
+      "year: 1932, document: 110000631_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 27, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1932/110000631_datacorr\n",
+      "--------------------------\n",
+      "year: 1932, document: 110000643_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 26, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1932, document: 110000643_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 26, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1932/110000643_datacorr\n",
+      "--------------------------\n",
+      "year: 1932, document: 110000643_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 26, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1932/110000643_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1930/04_correctedxml.tar.gz\n",
+      "year: 1930, document: 110000599_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1930, document: 110000599_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1930/110000599_datacorr\n",
+      "--------------------------\n",
+      "year: 1930, document: 110000599_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1930/110000599_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1925/04_correctedxml.tar.gz\n",
+      "year: 1925, document: 110000515_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1925, document: 110000515_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1925/110000515_datacorr\n",
+      "--------------------------\n",
+      "year: 1925, document: 110000515_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1925/110000515_datacorr\n",
+      "--------------------------\n",
+      "year: 1925, document: 110000507_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1925, document: 110000507_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1925/110000507_datacorr\n",
+      "--------------------------\n",
+      "year: 1925, document: 110000507_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1925/110000507_datacorr\n",
+      "--------------------------\n",
+      "year: 1925, document: 110000511_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1925, document: 110000511_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1925/110000511_datacorr\n",
+      "--------------------------\n",
+      "year: 1925, document: 110000511_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1925/110000511_datacorr\n",
+      "--------------------------\n",
+      "year: 1925, document: 110000519_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1925, document: 110000519_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1925/110000519_datacorr\n",
+      "--------------------------\n",
+      "year: 1925, document: 110000519_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1925/110000519_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1941/04_correctedxml.tar.gz\n",
+      "year: 1941, document: 110000795_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1941, document: 110000795_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1941/110000795_datacorr\n",
+      "--------------------------\n",
+      "year: 1941, document: 110000795_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1941/110000795_datacorr\n",
+      "--------------------------\n",
+      "year: 1941, document: 110000791_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 21, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1941, document: 110000791_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 21, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1941/110000791_datacorr\n",
+      "--------------------------\n",
+      "year: 1941, document: 110000791_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 21, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1941/110000791_datacorr\n",
+      "--------------------------\n",
+      "year: 1941, document: 110000787_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 21, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1941, document: 110000787_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 21, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1941/110000787_datacorr\n",
+      "--------------------------\n",
+      "year: 1941, document: 110000787_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 21, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1941/110000787_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1945/04_correctedxml.tar.gz\n",
+      "year: 1945, document: 110000871_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1945, document: 110000871_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1945/110000871_datacorr\n",
+      "--------------------------\n",
+      "year: 1945, document: 110000871_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1945/110000871_datacorr\n",
+      "--------------------------\n",
+      "year: 1945, document: 110000859_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1945, document: 110000859_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1945/110000859_datacorr\n",
+      "--------------------------\n",
+      "year: 1945, document: 110000859_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1945/110000859_datacorr\n",
+      "--------------------------\n",
+      "year: 1945, document: 110000867_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1945, document: 110000867_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1945/110000867_datacorr\n",
+      "--------------------------\n",
+      "year: 1945, document: 110000867_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1945/110000867_datacorr\n",
+      "--------------------------\n",
+      "year: 1945, document: 110000863_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1945, document: 110000863_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1945/110000863_datacorr\n",
+      "--------------------------\n",
+      "year: 1945, document: 110000863_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1945/110000863_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1950/04_correctedxml.tar.gz\n",
+      "year: 1950, document: 110000951_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 36, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1950, document: 110000951_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 36, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1950/110000951_datacorr\n",
+      "--------------------------\n",
+      "year: 1950, document: 110000951_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 36, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1950/110000951_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1897/04_correctedxml.tar.gz\n",
+      "year: 1897, document: 110000059_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1897, document: 110000059_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1897/110000059_datacorr\n",
+      "--------------------------\n",
+      "year: 1897, document: 110000059_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1897/110000059_datacorr\n",
+      "--------------------------\n",
+      "year: 1897, document: 110000071_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1897, document: 110000071_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1897/110000071_datacorr\n",
+      "--------------------------\n",
+      "year: 1897, document: 110000071_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1897/110000071_datacorr\n",
+      "--------------------------\n",
+      "year: 1897, document: 110000067_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1897, document: 110000067_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1897/110000067_datacorr\n",
+      "--------------------------\n",
+      "year: 1897, document: 110000067_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1897/110000067_datacorr\n",
+      "--------------------------\n",
+      "year: 1897, document: 110000063_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1897, document: 110000063_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1897/110000063_datacorr\n",
+      "--------------------------\n",
+      "year: 1897, document: 110000063_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1897/110000063_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1906/04_correctedxml.tar.gz\n",
+      "year: 1906, document: 110000199_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1906, document: 110000199_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1906/110000199_datacorr\n",
+      "--------------------------\n",
+      "year: 1906, document: 110000199_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1906/110000199_datacorr\n",
+      "--------------------------\n",
+      "year: 1906, document: 110000191_datacorr\n",
+      "coords: 359.28 ,416.641,598.32 ,441.107, page_id: 6, pages_left: 11, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1906, document: 110000191_datacorr\n",
+      "coords: 127.44 ,621.182,221.76 ,640.617, page_id: 15, pages_left: 2, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1906/110000191_datacorr\n",
+      "--------------------------\n",
+      "year: 1906, document: 110000191_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1906/110000191_datacorr\n",
+      "--------------------------\n",
+      "year: 1906, document: 110000191_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1906/110000191_datacorr\n",
+      "--------------------------\n",
+      "year: 1906, document: 110000191_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1906/110000191_datacorr\n",
+      "--------------------------\n",
+      "year: 1906, document: 110000187_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1906, document: 110000187_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1906/110000187_datacorr\n",
+      "--------------------------\n",
+      "year: 1906, document: 110000187_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1906/110000187_datacorr\n",
+      "--------------------------\n",
+      "year: 1906, document: 110000195_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 4, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1906, document: 110000195_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 4, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1906/110000195_datacorr\n",
+      "--------------------------\n",
+      "year: 1906, document: 110000195_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 4, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1906/110000195_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1924/04_correctedxml.tar.gz\n",
+      "year: 1924, document: 110000503_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1924, document: 110000503_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1924/110000503_datacorr\n",
+      "--------------------------\n",
+      "year: 1924, document: 110000503_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1924/110000503_datacorr\n",
+      "--------------------------\n",
+      "year: 1924, document: 110000499_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1924, document: 110000499_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1924/110000499_datacorr\n",
+      "--------------------------\n",
+      "year: 1924, document: 110000499_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1924/110000499_datacorr\n",
+      "--------------------------\n",
+      "year: 1924, document: 110000495_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1924, document: 110000495_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1924/110000495_datacorr\n",
+      "--------------------------\n",
+      "year: 1924, document: 110000495_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1924/110000495_datacorr\n",
+      "--------------------------\n",
+      "year: 1924, document: 110000491_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1924, document: 110000491_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1924/110000491_datacorr\n",
+      "--------------------------\n",
+      "year: 1924, document: 110000491_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1924/110000491_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1909/04_correctedxml.tar.gz\n",
+      "year: 1909, document: 110000235_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1909, document: 110000235_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1909/110000235_datacorr\n",
+      "--------------------------\n",
+      "year: 1909, document: 110000235_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1909/110000235_datacorr\n",
+      "--------------------------\n",
+      "year: 1909, document: 110000243_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1909, document: 110000243_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1909/110000243_datacorr\n",
+      "--------------------------\n",
+      "year: 1909, document: 110000243_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1909/110000243_datacorr\n",
+      "--------------------------\n",
+      "year: 1909, document: 110000247_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1909, document: 110000247_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1909/110000247_datacorr\n",
+      "--------------------------\n",
+      "year: 1909, document: 110000247_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1909/110000247_datacorr\n",
+      "--------------------------\n",
+      "year: 1909, document: 110000239_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1909, document: 110000239_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1909/110000239_datacorr\n",
+      "--------------------------\n",
+      "year: 1909, document: 110000239_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1909/110000239_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1892/04_correctedxml.tar.gz\n",
+      "year: 1892, document: 110000007_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 8, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1892, document: 110000007_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 8, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1892/110000007_datacorr\n",
+      "--------------------------\n",
+      "year: 1892, document: 110000007_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 8, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1892/110000007_datacorr\n",
+      "--------------------------\n",
+      "year: 1892, document: 110000013_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1892, document: 110000013_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1892/110000013_datacorr\n",
+      "--------------------------\n",
+      "year: 1892, document: 110000013_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1892/110000013_datacorr\n",
+      "--------------------------\n",
+      "year: 1892, document: 110000011_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1892, document: 110000011_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1892/110000011_datacorr\n",
+      "--------------------------\n",
+      "year: 1892, document: 110000011_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1892/110000011_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1903/04_correctedxml.tar.gz\n",
+      "year: 1903, document: 110000147_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1903, document: 110000147_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1903/110000147_datacorr\n",
+      "--------------------------\n",
+      "year: 1903, document: 110000147_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1903/110000147_datacorr\n",
+      "--------------------------\n",
+      "year: 1903, document: 110000151_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1903, document: 110000151_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1903/110000151_datacorr\n",
+      "--------------------------\n",
+      "year: 1903, document: 110000151_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1903/110000151_datacorr\n",
+      "--------------------------\n",
+      "year: 1903, document: 110000155_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1903, document: 110000155_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1903/110000155_datacorr\n",
+      "--------------------------\n",
+      "year: 1903, document: 110000155_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1903/110000155_datacorr\n",
+      "--------------------------\n",
+      "year: 1903, document: 110000143_datacorr\n",
+      "coords:  57.557,444.706,150.368,454.78 , page_id: 3, pages_left: 8, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1903, document: 110000143_datacorr\n",
+      "coords: 320.88 ,448.304,393.546,459.097, page_id: 3, pages_left: 8, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1903/110000143_datacorr\n",
+      "--------------------------\n",
+      "year: 1903, document: 110000143_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1903/110000143_datacorr\n",
+      "--------------------------\n",
+      "year: 1903, document: 110000143_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1903/110000143_datacorr\n",
+      "--------------------------\n",
+      "year: 1903, document: 110000143_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1903/110000143_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1895/04_correctedxml.tar.gz\n",
+      "year: 1895, document: 110000039_datacorr\n",
+      "coords: 511.025,531.933,628.344,542.01 , page_id: 0, pages_left: 2, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1895, document: 110000039_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 2, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1895/110000039_datacorr\n",
+      "--------------------------\n",
+      "year: 1895, document: 110000039_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 2, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1895/110000039_datacorr\n",
+      "--------------------------\n",
+      "year: 1895, document: 110000039_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 2, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1895/110000039_datacorr\n",
+      "--------------------------\n",
+      "year: 1895, document: 110000033_datacorr\n",
+      "coords: 184.198,795.16 ,264.065,800.917, page_id: 1, pages_left: 9, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1895, document: 110000033_datacorr\n",
+      "coords: 479.52 ,472.059,695.52 ,488.61 , page_id: 3, pages_left: 7, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1895/110000033_datacorr\n",
+      "--------------------------\n",
+      "year: 1895, document: 110000033_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1895/110000033_datacorr\n",
+      "--------------------------\n",
+      "year: 1895, document: 110000033_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1895/110000033_datacorr\n",
+      "--------------------------\n",
+      "year: 1895, document: 110000033_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1895/110000033_datacorr\n",
+      "--------------------------\n",
+      "year: 1895, document: 110000037_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1895, document: 110000037_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1895/110000037_datacorr\n",
+      "--------------------------\n",
+      "year: 1895, document: 110000037_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1895/110000037_datacorr\n",
+      "--------------------------\n",
+      "year: 1895, document: 110000043_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1895, document: 110000043_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1895/110000043_datacorr\n",
+      "--------------------------\n",
+      "year: 1895, document: 110000043_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1895/110000043_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1919/04_correctedxml.tar.gz\n",
+      "year: 1919, document: 110000399_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 22, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1919, document: 110000399_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 22, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1919/110000399_datacorr\n",
+      "--------------------------\n",
+      "year: 1919, document: 110000399_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 22, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1919/110000399_datacorr\n",
+      "--------------------------\n",
+      "year: 1919, document: 110000415_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1919, document: 110000415_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1919/110000415_datacorr\n",
+      "--------------------------\n",
+      "year: 1919, document: 110000415_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1919/110000415_datacorr\n",
+      "--------------------------\n",
+      "year: 1919, document: 110000407_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1919, document: 110000407_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1919/110000407_datacorr\n",
+      "--------------------------\n",
+      "year: 1919, document: 110000407_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1919/110000407_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1905/04_correctedxml.tar.gz\n",
+      "year: 1905, document: 110000179_datacorr\n",
+      "coords:  29.498,293.04 ,270.517,320.4  , page_id: 4, pages_left: 11, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1905, document: 110000179_datacorr\n",
+      "coords:  53.26 ,459.948,175.614,480.822, page_id: 6, pages_left: 9, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1905/110000179_datacorr\n",
+      "--------------------------\n",
+      "year: 1905, document: 110000179_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1905/110000179_datacorr\n",
+      "--------------------------\n",
+      "year: 1905, document: 110000179_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1905/110000179_datacorr\n",
+      "--------------------------\n",
+      "year: 1905, document: 110000179_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1905/110000179_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1911/04_correctedxml.tar.gz\n",
+      "year: 1911, document: 110000279_datacorr\n",
+      "coords: 246.055,273.6  ,535.277,287.28 , page_id: 8, pages_left: 8, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1911, document: 110000279_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1911/110000279_datacorr\n",
+      "--------------------------\n",
+      "year: 1911, document: 110000279_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1911/110000279_datacorr\n",
+      "--------------------------\n",
+      "year: 1911, document: 110000279_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1911/110000279_datacorr\n",
+      "--------------------------\n",
+      "year: 1911, document: 110000275_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1911, document: 110000275_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1911/110000275_datacorr\n",
+      "--------------------------\n",
+      "year: 1911, document: 110000275_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1911/110000275_datacorr\n",
+      "--------------------------\n",
+      "year: 1911, document: 110000271_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1911, document: 110000271_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1911/110000271_datacorr\n",
+      "--------------------------\n",
+      "year: 1911, document: 110000271_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1911/110000271_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1938/04_correctedxml.tar.gz\n",
+      "year: 1938, document: 110000743_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1938, document: 110000743_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1938/110000743_datacorr\n",
+      "--------------------------\n",
+      "year: 1938, document: 110000743_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1938/110000743_datacorr\n",
+      "--------------------------\n",
+      "year: 1938, document: 110000739_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1938, document: 110000739_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1938/110000739_datacorr\n",
+      "--------------------------\n",
+      "year: 1938, document: 110000739_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1938/110000739_datacorr\n",
+      "--------------------------\n",
+      "year: 1938, document: 110000747_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 45, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1938, document: 110000747_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 45, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1938/110000747_datacorr\n",
+      "--------------------------\n",
+      "year: 1938, document: 110000747_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 45, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1938/110000747_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1908/04_correctedxml.tar.gz\n",
+      "year: 1908, document: 110000223_datacorr\n",
+      "coords:  40.29 ,262.005,263.322,300.874, page_id: 10, pages_left: 6, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1908, document: 110000223_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1908/110000223_datacorr\n",
+      "--------------------------\n",
+      "year: 1908, document: 110000223_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1908/110000223_datacorr\n",
+      "--------------------------\n",
+      "year: 1908, document: 110000223_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1908/110000223_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1922/04_correctedxml.tar.gz\n",
+      "year: 1922, document: 110000463_datacorr\n",
+      "coords: 402.894,742.828,541.028,759.383, page_id: 2, pages_left: 19, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1922, document: 110000463_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 21, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1922, document: 110000463_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 21, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1922, document: 110000463_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 21, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1922, document: 110000455_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1922, document: 110000455_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1922/110000455_datacorr\n",
+      "--------------------------\n",
+      "year: 1922, document: 110000455_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1922/110000455_datacorr\n",
+      "--------------------------\n",
+      "year: 1922, document: 110000467_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1922, document: 110000467_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1922/110000467_datacorr\n",
+      "--------------------------\n",
+      "year: 1922, document: 110000467_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1922/110000467_datacorr\n",
+      "--------------------------\n",
+      "year: 1922, document: 110000459_datacorr\n",
+      "coords: 421.034,475.655,523.234,501.56 , page_id: 23, pages_left: 22, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1922, document: 110000459_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 45, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1922/110000459_datacorr\n",
+      "--------------------------\n",
+      "year: 1922, document: 110000459_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 45, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1922/110000459_datacorr\n",
+      "--------------------------\n",
+      "year: 1922, document: 110000459_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 45, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1922/110000459_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1940/04_correctedxml.tar.gz\n",
+      "year: 1940, document: 110000775_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1940, document: 110000775_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1940/110000775_datacorr\n",
+      "--------------------------\n",
+      "year: 1940, document: 110000775_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1940/110000775_datacorr\n",
+      "--------------------------\n",
+      "year: 1940, document: 110000771_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 33, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1940, document: 110000771_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 33, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1940/110000771_datacorr\n",
+      "--------------------------\n",
+      "year: 1940, document: 110000771_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 33, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1940/110000771_datacorr\n",
+      "--------------------------\n",
+      "year: 1940, document: 110000783_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 26, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1940, document: 110000783_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 26, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1940/110000783_datacorr\n",
+      "--------------------------\n",
+      "year: 1940, document: 110000783_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 26, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1940/110000783_datacorr\n",
+      "--------------------------\n",
+      "year: 1940, document: 110000779_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 27, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1940, document: 110000779_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 27, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1940/110000779_datacorr\n",
+      "--------------------------\n",
+      "year: 1940, document: 110000779_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 27, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1940/110000779_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1926/04_correctedxml.tar.gz\n",
+      "year: 1926, document: 110000531_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1926, document: 110000531_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1926/110000531_datacorr\n",
+      "--------------------------\n",
+      "year: 1926, document: 110000531_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1926/110000531_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1902/04_correctedxml.tar.gz\n",
+      "year: 1902, document: 110000139_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1902, document: 110000139_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1902/110000139_datacorr\n",
+      "--------------------------\n",
+      "year: 1902, document: 110000139_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1902/110000139_datacorr\n",
+      "--------------------------\n",
+      "year: 1902, document: 110000131_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1902, document: 110000131_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1902/110000131_datacorr\n",
+      "--------------------------\n",
+      "year: 1902, document: 110000131_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1902/110000131_datacorr\n",
+      "--------------------------\n",
+      "year: 1902, document: 110000127_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1902, document: 110000127_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1902/110000127_datacorr\n",
+      "--------------------------\n",
+      "year: 1902, document: 110000127_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1902/110000127_datacorr\n",
+      "--------------------------\n",
+      "year: 1902, document: 110000135_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1902, document: 110000135_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1902/110000135_datacorr\n",
+      "--------------------------\n",
+      "year: 1902, document: 110000135_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1902/110000135_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1943/04_correctedxml.tar.gz\n",
+      "year: 1943, document: 110000827_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1943, document: 110000827_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1943/110000827_datacorr\n",
+      "--------------------------\n",
+      "year: 1943, document: 110000827_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1943/110000827_datacorr\n",
+      "--------------------------\n",
+      "year: 1943, document: 110000831_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1943, document: 110000831_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1943/110000831_datacorr\n",
+      "--------------------------\n",
+      "year: 1943, document: 110000831_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1943/110000831_datacorr\n",
+      "--------------------------\n",
+      "year: 1943, document: 110000835_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1943, document: 110000835_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1943/110000835_datacorr\n",
+      "--------------------------\n",
+      "year: 1943, document: 110000835_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1943/110000835_datacorr\n",
+      "--------------------------\n",
+      "year: 1943, document: 110000823_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 8, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1943, document: 110000823_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 8, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1943/110000823_datacorr\n",
+      "--------------------------\n",
+      "year: 1943, document: 110000823_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 8, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1943/110000823_datacorr\n",
+      "--------------------------\n",
+      "year: 1943, document: 110000839_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1943, document: 110000839_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1943/110000839_datacorr\n",
+      "--------------------------\n",
+      "year: 1943, document: 110000839_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1943/110000839_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1901/04_correctedxml.tar.gz\n",
+      "year: 1901, document: 110000115_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1901, document: 110000115_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1901/110000115_datacorr\n",
+      "--------------------------\n",
+      "year: 1901, document: 110000115_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1901/110000115_datacorr\n",
+      "--------------------------\n",
+      "year: 1901, document: 110000119_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1901, document: 110000119_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1901/110000119_datacorr\n",
+      "--------------------------\n",
+      "year: 1901, document: 110000119_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1901/110000119_datacorr\n",
+      "--------------------------\n",
+      "year: 1901, document: 110000123_datacorr\n",
+      "coords: 183.483,546.327,324.513,562.162, page_id: 9, pages_left: 7, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1901, document: 110000123_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1901/110000123_datacorr\n",
+      "--------------------------\n",
+      "year: 1901, document: 110000123_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1901/110000123_datacorr\n",
+      "--------------------------\n",
+      "year: 1901, document: 110000123_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1901/110000123_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1913/04_correctedxml.tar.gz\n",
+      "year: 1913, document: 110000307_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1913, document: 110000307_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1913/110000307_datacorr\n",
+      "--------------------------\n",
+      "year: 1913, document: 110000307_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1913/110000307_datacorr\n",
+      "--------------------------\n",
+      "year: 1913, document: 110000311_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1913, document: 110000311_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1913/110000311_datacorr\n",
+      "--------------------------\n",
+      "year: 1913, document: 110000311_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1913/110000311_datacorr\n",
+      "--------------------------\n",
+      "year: 1913, document: 110000303_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1913, document: 110000303_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1913/110000303_datacorr\n",
+      "--------------------------\n",
+      "year: 1913, document: 110000303_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1913/110000303_datacorr\n",
+      "--------------------------\n",
+      "year: 1913, document: 110000295_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1913, document: 110000295_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1913/110000295_datacorr\n",
+      "--------------------------\n",
+      "year: 1913, document: 110000295_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1913/110000295_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1948/04_correctedxml.tar.gz\n",
+      "year: 1948, document: 110000915_datacorr\n",
+      "coords: 420.855, 91.387,466.178, 96.424, page_id: 0, pages_left: 37, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1948, document: 110000915_datacorr\n",
+      "coords: 523.225, 76.32 ,552.013, 80.64 , page_id: 6, pages_left: 31, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1948/110000915_datacorr\n",
+      "--------------------------\n",
+      "year: 1948, document: 110000915_datacorr\n",
+      "coords: 245.52 , 95.012,281.52 ,100.051, page_id: 32, pages_left: 5, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1948/110000915_datacorr\n",
+      "--------------------------\n",
+      "year: 1948, document: 110000915_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 37, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1948/110000915_datacorr\n",
+      "--------------------------\n",
+      "year: 1948, document: 110000915_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 37, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1948/110000915_datacorr\n",
+      "--------------------------\n",
+      "year: 1948, document: 110000915_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 37, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1948/110000915_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1915/04_correctedxml.tar.gz\n",
+      "year: 1915, document: 110000339_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1915, document: 110000339_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1915/110000339_datacorr\n",
+      "--------------------------\n",
+      "year: 1915, document: 110000339_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1915/110000339_datacorr\n",
+      "--------------------------\n",
+      "year: 1915, document: 110000343_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1915, document: 110000343_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1915/110000343_datacorr\n",
+      "--------------------------\n",
+      "year: 1915, document: 110000343_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1915/110000343_datacorr\n",
+      "--------------------------\n",
+      "year: 1915, document: 110000331_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1915, document: 110000331_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1915/110000331_datacorr\n",
+      "--------------------------\n",
+      "year: 1915, document: 110000331_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1915/110000331_datacorr\n",
+      "--------------------------\n",
+      "year: 1915, document: 110000335_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1915, document: 110000335_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1915/110000335_datacorr\n",
+      "--------------------------\n",
+      "year: 1915, document: 110000335_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1915/110000335_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1898/04_correctedxml.tar.gz\n",
+      "year: 1898, document: 110000075_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1898, document: 110000075_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1898/110000075_datacorr\n",
+      "--------------------------\n",
+      "year: 1898, document: 110000075_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1898/110000075_datacorr\n",
+      "--------------------------\n",
+      "year: 1898, document: 110000087_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1898, document: 110000087_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1898/110000087_datacorr\n",
+      "--------------------------\n",
+      "year: 1898, document: 110000087_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1898/110000087_datacorr\n",
+      "--------------------------\n",
+      "year: 1898, document: 110000079_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1898, document: 110000079_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1898/110000079_datacorr\n",
+      "--------------------------\n",
+      "year: 1898, document: 110000079_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1898/110000079_datacorr\n",
+      "--------------------------\n",
+      "year: 1898, document: 110000083_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1898, document: 110000083_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1898/110000083_datacorr\n",
+      "--------------------------\n",
+      "year: 1898, document: 110000083_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 10, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1898/110000083_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1934/04_correctedxml.tar.gz\n",
+      "year: 1934, document: 110000663_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 29, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1934, document: 110000663_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 29, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1934/110000663_datacorr\n",
+      "--------------------------\n",
+      "year: 1934, document: 110000663_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 29, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1934/110000663_datacorr\n",
+      "--------------------------\n",
+      "year: 1934, document: 110000675_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 5, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1934, document: 110000675_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 5, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1934/110000675_datacorr\n",
+      "--------------------------\n",
+      "year: 1934, document: 110000675_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 5, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1934/110000675_datacorr\n",
+      "--------------------------\n",
+      "year: 1934, document: 110000671_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1934, document: 110000671_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1934/110000671_datacorr\n",
+      "--------------------------\n",
+      "year: 1934, document: 110000671_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 20, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1934/110000671_datacorr\n",
+      "--------------------------\n",
+      "year: 1934, document: 110000667_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 27, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1934, document: 110000667_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 27, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1934/110000667_datacorr\n",
+      "--------------------------\n",
+      "year: 1934, document: 110000667_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 27, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1934/110000667_datacorr\n",
+      "--------------------------\n",
+      "year: 1934, document: 110000679_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1934, document: 110000679_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1934/110000679_datacorr\n",
+      "--------------------------\n",
+      "year: 1934, document: 110000679_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1934/110000679_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1931/04_correctedxml.tar.gz\n",
+      "year: 1931, document: 110000615_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1931, document: 110000615_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1931/110000615_datacorr\n",
+      "--------------------------\n",
+      "year: 1931, document: 110000615_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1931/110000615_datacorr\n",
+      "--------------------------\n",
+      "year: 1931, document: 110000611_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1931, document: 110000611_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1931/110000611_datacorr\n",
+      "--------------------------\n",
+      "year: 1931, document: 110000611_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1931/110000611_datacorr\n",
+      "--------------------------\n",
+      "year: 1931, document: 110000623_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1931, document: 110000623_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1931/110000623_datacorr\n",
+      "--------------------------\n",
+      "year: 1931, document: 110000623_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1931/110000623_datacorr\n",
+      "--------------------------\n",
+      "year: 1931, document: 110000619_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1931, document: 110000619_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1931/110000619_datacorr\n",
+      "--------------------------\n",
+      "year: 1931, document: 110000619_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1931/110000619_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1946/04_correctedxml.tar.gz\n",
+      "year: 1946, document: 110000887_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 33, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1946, document: 110000887_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 33, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1946, document: 110000887_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 33, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1946, document: 110000879_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 40, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1946, document: 110000879_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 40, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1946, document: 110000879_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 40, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1946, document: 110000891_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 32, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1946, document: 110000891_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 32, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1946, document: 110000891_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 32, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1946, document: 110000883_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 26, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1946, document: 110000883_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 26, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1946/110000883_datacorr\n",
+      "--------------------------\n",
+      "year: 1946, document: 110000883_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 26, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1946/110000883_datacorr\n",
+      "--------------------------\n",
+      "year: 1946, document: 110000875_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 40, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1946, document: 110000875_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 40, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "year: 1946, document: 110000875_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 40, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1916/04_correctedxml.tar.gz\n",
+      "year: 1916, document: 110000359_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1916, document: 110000359_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1916/110000359_datacorr\n",
+      "--------------------------\n",
+      "year: 1916, document: 110000359_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1916/110000359_datacorr\n",
+      "--------------------------\n",
+      "year: 1916, document: 110000363_datacorr\n",
+      "coords: 110.808,520.419,446.109,539.133, page_id: 10, pages_left: 6, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1916, document: 110000363_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1916/110000363_datacorr\n",
+      "--------------------------\n",
+      "year: 1916, document: 110000363_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1916/110000363_datacorr\n",
+      "--------------------------\n",
+      "year: 1916, document: 110000363_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1916/110000363_datacorr\n",
+      "--------------------------\n",
+      "year: 1916, document: 110000347_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 9, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1916, document: 110000347_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 9, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1916/110000347_datacorr\n",
+      "--------------------------\n",
+      "year: 1916, document: 110000347_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 9, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1916/110000347_datacorr\n",
+      "--------------------------\n",
+      "year: 1916, document: 110000355_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1916, document: 110000355_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1916/110000355_datacorr\n",
+      "--------------------------\n",
+      "year: 1916, document: 110000355_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1916/110000355_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1917/04_correctedxml.tar.gz\n",
+      "year: 1917, document: 110000367_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1917, document: 110000367_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1917/110000367_datacorr\n",
+      "--------------------------\n",
+      "year: 1917, document: 110000367_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1917/110000367_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1933/04_correctedxml.tar.gz\n",
+      "year: 1933, document: 110000659_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1933, document: 110000659_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1933/110000659_datacorr\n",
+      "--------------------------\n",
+      "year: 1933, document: 110000659_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1933/110000659_datacorr\n",
+      "--------------------------\n",
+      "year: 1933, document: 110000655_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1933, document: 110000655_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1933/110000655_datacorr\n",
+      "--------------------------\n",
+      "year: 1933, document: 110000655_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1933/110000655_datacorr\n",
+      "--------------------------\n",
+      "year: 1933, document: 110000651_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 27, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1933, document: 110000651_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 27, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1933/110000651_datacorr\n",
+      "--------------------------\n",
+      "year: 1933, document: 110000651_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 27, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1933/110000651_datacorr\n",
+      "--------------------------\n",
+      "year: 1933, document: 110000647_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1933, document: 110000647_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1933/110000647_datacorr\n",
+      "--------------------------\n",
+      "year: 1933, document: 110000647_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1933/110000647_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1904/04_correctedxml.tar.gz\n",
+      "year: 1904, document: 110000167_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1904, document: 110000167_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1904/110000167_datacorr\n",
+      "--------------------------\n",
+      "year: 1904, document: 110000167_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1904/110000167_datacorr\n",
+      "--------------------------\n",
+      "year: 1904, document: 110000159_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1904, document: 110000159_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1904/110000159_datacorr\n",
+      "--------------------------\n",
+      "year: 1904, document: 110000159_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 14, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1904/110000159_datacorr\n",
+      "--------------------------\n",
+      "year: 1904, document: 110000163_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1904, document: 110000163_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1904/110000163_datacorr\n",
+      "--------------------------\n",
+      "year: 1904, document: 110000163_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1904/110000163_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1894/04_correctedxml.tar.gz\n",
+      "year: 1894, document: 110000021_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1894, document: 110000021_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1894/110000021_datacorr\n",
+      "--------------------------\n",
+      "year: 1894, document: 110000021_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1894/110000021_datacorr\n",
+      "--------------------------\n",
+      "year: 1894, document: 110000029_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1894, document: 110000029_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1894/110000029_datacorr\n",
+      "--------------------------\n",
+      "year: 1894, document: 110000029_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1894/110000029_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1914/04_correctedxml.tar.gz\n",
+      "year: 1914, document: 110000299_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1914, document: 110000299_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1914/110000299_datacorr\n",
+      "--------------------------\n",
+      "year: 1914, document: 110000299_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1914/110000299_datacorr\n",
+      "--------------------------\n",
+      "year: 1914, document: 110000315_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1914, document: 110000315_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1914/110000315_datacorr\n",
+      "--------------------------\n",
+      "year: 1914, document: 110000315_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 12, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1914/110000315_datacorr\n",
+      "--------------------------\n",
+      "year: 1914, document: 110000327_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1914, document: 110000327_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1914/110000327_datacorr\n",
+      "--------------------------\n",
+      "year: 1914, document: 110000327_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1914/110000327_datacorr\n",
+      "--------------------------\n",
+      "year: 1914, document: 110000319_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1914, document: 110000319_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1914/110000319_datacorr\n",
+      "--------------------------\n",
+      "year: 1914, document: 110000319_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1914/110000319_datacorr\n",
+      "--------------------------\n",
+      "year: 1914, document: 110000323_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 1, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1914, document: 110000323_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 1, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1914/110000323_datacorr\n",
+      "--------------------------\n",
+      "year: 1914, document: 110000323_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 1, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1914/110000323_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1907/04_correctedxml.tar.gz\n",
+      "year: 1907, document: 110000211_datacorr\n",
+      "coords: 341.743,655.544,601.468,673.534, page_id: 14, pages_left: 5, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000211_datacorr\n",
+      "coords: 417.286,718.868,598.59 ,731.82 , page_id: 14, pages_left: 5, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1907/110000211_datacorr\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000211_datacorr\n",
+      "coords: 435.273,436.07 ,600.029,467.012, page_id: 14, pages_left: 5, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1907/110000211_datacorr\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000211_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1907/110000211_datacorr\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000211_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1907/110000211_datacorr\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000211_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1907/110000211_datacorr\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000203_datacorr\n",
+      "coords:  47.484,565.92 ,248.933,585.36 , page_id: 9, pages_left: 6, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000203_datacorr\n",
+      "coords: 473.405,573.12 ,607.225,587.52 , page_id: 9, pages_left: 6, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1907/110000203_datacorr\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000203_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1907/110000203_datacorr\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000203_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1907/110000203_datacorr\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000203_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 15, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1907/110000203_datacorr\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000215_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000215_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1907/110000215_datacorr\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000215_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1907/110000215_datacorr\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000219_datacorr\n",
+      "coords:  45.343,220.194,272.058,252.575, page_id: 2, pages_left: 15, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000219_datacorr\n",
+      "coords: 106.48 ,189.36 ,256.127,226.08 , page_id: 6, pages_left: 11, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1907/110000219_datacorr\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000219_datacorr\n",
+      "coords: 340.56 ,110.097,605.52 ,121.61 , page_id: 15, pages_left: 2, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1907/110000219_datacorr\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000219_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1907/110000219_datacorr\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000219_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1907/110000219_datacorr\n",
+      "--------------------------\n",
+      "year: 1907, document: 110000219_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1907/110000219_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1891/04_correctedxml.tar.gz\n",
+      "year: 1891, document: 110000005_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 9, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1891, document: 110000005_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 9, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1891/110000005_datacorr\n",
+      "--------------------------\n",
+      "year: 1891, document: 110000005_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 9, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1891/110000005_datacorr\n",
+      "--------------------------\n",
+      "year: 1891, document: 110000001_datacorr\n",
+      "coords: 465.684,502.421,680.172,515.377, page_id: 7, pages_left: 4, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 0\n",
+      "--------------------------\n",
+      "year: 1891, document: 110000001_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1891/110000001_datacorr\n",
+      "--------------------------\n",
+      "year: 1891, document: 110000001_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1891/110000001_datacorr\n",
+      "--------------------------\n",
+      "year: 1891, document: 110000001_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1891/110000001_datacorr\n",
+      "--------------------------\n",
+      "year: 1891, document: 110000003_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 2, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1891, document: 110000003_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 2, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1891/110000003_datacorr\n",
+      "--------------------------\n",
+      "year: 1891, document: 110000003_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 2, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1891/110000003_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1939/04_correctedxml.tar.gz\n",
+      "year: 1939, document: 110000763_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1939, document: 110000763_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1939/110000763_datacorr\n",
+      "--------------------------\n",
+      "year: 1939, document: 110000763_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 28, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1939/110000763_datacorr\n",
+      "--------------------------\n",
+      "year: 1939, document: 110000767_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1939, document: 110000767_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1939/110000767_datacorr\n",
+      "--------------------------\n",
+      "year: 1939, document: 110000767_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1939/110000767_datacorr\n",
+      "--------------------------\n",
+      "year: 1939, document: 110000759_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 1, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1939, document: 110000759_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 1, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1939/110000759_datacorr\n",
+      "--------------------------\n",
+      "year: 1939, document: 110000759_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 1, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1939/110000759_datacorr\n",
+      "--------------------------\n",
+      "year: 1939, document: 110000755_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 35, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1939, document: 110000755_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 35, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1939/110000755_datacorr\n",
+      "--------------------------\n",
+      "year: 1939, document: 110000755_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 35, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1939/110000755_datacorr\n",
+      "--------------------------\n",
+      "year: 1939, document: 110000751_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 36, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1939, document: 110000751_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 36, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1939/110000751_datacorr\n",
+      "--------------------------\n",
+      "year: 1939, document: 110000751_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 36, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1939/110000751_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1896/04_correctedxml.tar.gz\n",
+      "year: 1896, document: 110000047_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1896, document: 110000047_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1896/110000047_datacorr\n",
+      "--------------------------\n",
+      "year: 1896, document: 110000047_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 11, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1896/110000047_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1927/04_correctedxml.tar.gz\n",
+      "year: 1927, document: 110000543_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1927, document: 110000543_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1927/110000543_datacorr\n",
+      "--------------------------\n",
+      "year: 1927, document: 110000543_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1927/110000543_datacorr\n",
+      "--------------------------\n",
+      "year: 1927, document: 110000555_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1927, document: 110000555_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1927/110000555_datacorr\n",
+      "--------------------------\n",
+      "year: 1927, document: 110000555_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1927/110000555_datacorr\n",
+      "--------------------------\n",
+      "year: 1927, document: 110000551_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 21, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1927, document: 110000551_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 21, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1927/110000551_datacorr\n",
+      "--------------------------\n",
+      "year: 1927, document: 110000551_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 21, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1927/110000551_datacorr\n",
+      "--------------------------\n",
+      "year: 1927, document: 110000547_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1927, document: 110000547_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1927/110000547_datacorr\n",
+      "--------------------------\n",
+      "year: 1927, document: 110000547_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1927/110000547_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1929/04_correctedxml.tar.gz\n",
+      "year: 1929, document: 110000579_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1929, document: 110000579_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1929/110000579_datacorr\n",
+      "--------------------------\n",
+      "year: 1929, document: 110000579_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1929/110000579_datacorr\n",
+      "--------------------------\n",
+      "year: 1929, document: 110000575_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1929, document: 110000575_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1929/110000575_datacorr\n",
+      "--------------------------\n",
+      "year: 1929, document: 110000575_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1929/110000575_datacorr\n",
+      "--------------------------\n",
+      "year: 1929, document: 110000583_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1929, document: 110000583_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1929/110000583_datacorr\n",
+      "--------------------------\n",
+      "year: 1929, document: 110000583_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1929/110000583_datacorr\n",
+      "--------------------------\n",
+      "year: 1929, document: 110000587_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 29, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1929, document: 110000587_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 29, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1929/110000587_datacorr\n",
+      "--------------------------\n",
+      "year: 1929, document: 110000587_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 29, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1929/110000587_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1912/04_correctedxml.tar.gz\n",
+      "year: 1912, document: 110000291_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1912, document: 110000291_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1912/110000291_datacorr\n",
+      "--------------------------\n",
+      "year: 1912, document: 110000291_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 16, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1912/110000291_datacorr\n",
+      "--------------------------\n",
+      "year: 1912, document: 110000287_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1912, document: 110000287_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1912/110000287_datacorr\n",
+      "--------------------------\n",
+      "year: 1912, document: 110000287_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 17, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1912/110000287_datacorr\n",
+      "--------------------------\n",
+      "year: 1912, document: 110000283_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1912, document: 110000283_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1912/110000283_datacorr\n",
+      "--------------------------\n",
+      "year: 1912, document: 110000283_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 13, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1912/110000283_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1937/04_correctedxml.tar.gz\n",
+      "year: 1937, document: 110000723_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1937, document: 110000723_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1937/110000723_datacorr\n",
+      "--------------------------\n",
+      "year: 1937, document: 110000723_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 34, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1937/110000723_datacorr\n",
+      "--------------------------\n",
+      "year: 1937, document: 110000719_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 41, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1937, document: 110000719_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 41, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1937/110000719_datacorr\n",
+      "--------------------------\n",
+      "year: 1937, document: 110000719_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 41, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1937/110000719_datacorr\n",
+      "--------------------------\n",
+      "year: 1937, document: 110000731_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1937, document: 110000731_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1937/110000731_datacorr\n",
+      "--------------------------\n",
+      "year: 1937, document: 110000731_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1937/110000731_datacorr\n",
+      "--------------------------\n",
+      "year: 1937, document: 110000727_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1937, document: 110000727_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1937/110000727_datacorr\n",
+      "--------------------------\n",
+      "year: 1937, document: 110000727_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 42, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1937/110000727_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1928/04_correctedxml.tar.gz\n",
+      "year: 1928, document: 110000563_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 22, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1928, document: 110000563_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 22, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1928/110000563_datacorr\n",
+      "--------------------------\n",
+      "year: 1928, document: 110000563_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 22, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1928/110000563_datacorr\n",
+      "--------------------------\n",
+      "year: 1928, document: 110000559_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1928, document: 110000559_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1928/110000559_datacorr\n",
+      "--------------------------\n",
+      "year: 1928, document: 110000559_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1928/110000559_datacorr\n",
+      "--------------------------\n",
+      "year: 1928, document: 110000571_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1928, document: 110000571_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1928/110000571_datacorr\n",
+      "--------------------------\n",
+      "year: 1928, document: 110000571_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 23, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1928/110000571_datacorr\n",
+      "--------------------------\n",
+      "year: 1928, document: 110000567_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1928, document: 110000567_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1928/110000567_datacorr\n",
+      "--------------------------\n",
+      "year: 1928, document: 110000567_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 19, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1928/110000567_datacorr\n",
+      "--------------------------\n",
+      "../data/AB_other/SessionOverviews_tar/1942/04_correctedxml.tar.gz\n",
+      "year: 1942, document: 110000819_datacorr\n",
+      "coords: 238.000,501.764,258.966,510.692, page_id: 3, pages_left: 21, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 2\n",
+      "--------------------------\n",
+      "year: 1942, document: 110000819_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1942/110000819_datacorr\n",
+      "--------------------------\n",
+      "year: 1942, document: 110000819_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1942/110000819_datacorr\n",
+      "--------------------------\n",
+      "year: 1942, document: 110000819_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 24, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1942/110000819_datacorr\n",
+      "--------------------------\n",
+      "year: 1942, document: 110000807_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 26, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1942, document: 110000807_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 26, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1942/110000807_datacorr\n",
+      "--------------------------\n",
+      "year: 1942, document: 110000807_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 26, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1942/110000807_datacorr\n",
+      "--------------------------\n",
+      "year: 1942, document: 110000811_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1942, document: 110000811_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1942/110000811_datacorr\n",
+      "--------------------------\n",
+      "year: 1942, document: 110000811_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 25, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1942/110000811_datacorr\n",
+      "--------------------------\n",
+      "year: 1942, document: 110000815_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 1, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1942, document: 110000815_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 1, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1942/110000815_datacorr\n",
+      "--------------------------\n",
+      "year: 1942, document: 110000815_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 1, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1942/110000815_datacorr\n",
+      "--------------------------\n",
+      "year: 1942, document: 110000803_datacorr\n",
+      "coords: 80.000,300.230,456.450,311.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "number of textlines with this bbox in 02_xml 1\n",
+      "--------------------------\n",
+      "year: 1942, document: 110000803_datacorr\n",
+      "coords: 80.000,315.230,450.770,326.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1942/110000803_datacorr\n",
+      "--------------------------\n",
+      "year: 1942, document: 110000803_datacorr\n",
+      "coords: 80.000,330.230,489.100,341.390, page_id: 18, pages_left: 0, n_reps: 2\n",
+      "all repetitions are the same: True\n",
+      "Couldn't parse 02_xml 1942/110000803_datacorr\n",
+      "--------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "max_n =200\n",
+    "n=0\n",
+    "for y_id, year in enumerate(os.listdir(path)):\n",
+    "    if os.path.exists(os.path.join(path, year, \"04_correctedxml.tar.gz\")):\n",
+    "        files, _nothing = utils_proc.get_list(year, folder_database=path, name_file=\"04_correctedxml\")\n",
+    "        for f in files:\n",
+    "            file_id = f.split(\"/\")[-1].split(\".\")[0]\n",
+    "            fp, tar = utils_proc.get_handlerfile(f, path, name_file=\"04_correctedxml\", return_tar_handle=True)\n",
+    "            \n",
+    "            try:\n",
+    "                fp_raw, tar2 = utils_proc.get_handlerfile(f.replace(\"corr.xml\", \".xml\"), path, name_file=\"02_extractedxml\", return_tar_handle=True)\n",
+    "            except:\n",
+    "                fp_raw = None\n",
+    "                tar2 = tar\n",
+    "            duplicates_in_file(fp,  fp_raw ,year=year, file=file_id)\n",
+    "            fp.close()\n",
+    "            tar.close()\n",
+    "            tar2.close()\n",
+    "            n+=1\n",
+    "    if n >= max_n:\n",
+    "        break\n",
+    "#full_pathrepetitions_in_file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file_name = \"110000107_datacorr.xml\"\n",
+    "\n",
+    "\n",
+    "def duplicates_in_file(fp):\n",
+    "    #full_path = os.path.join(path, file_name)\n",
+    "    #os.path.exists(full_path)\n",
+    "    tree = ET.parse(fp)\n",
+    "    all_textlines = list( tree.findall(\".//textline\"))\n",
+    "\n",
+    "    bboxes = [t.attrib[\"bbox\"] for t in all_textlines]\n",
+    "\n",
+    "\n",
+    "    duplicated_lines = boolean_index(all_textlines, is_dublicated(bboxes))\n",
+    "    duplicated_lines\n",
+    "    for l in duplicated_lines:\n",
+    "        print(ET.tostring(l))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now in the previous file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[]\n"
+     ]
+    }
+   ],
+   "source": [
+    "file_name = \"110000107_data.xml\"\n",
+    "full_path = os.path.join(path, file_name)\n",
+    "os.path.exists(full_path)\n",
+    "tree = ET.parse(full_path)\n",
+    "all_textlines = list( tree.findall(\".//textline\"))\n",
+    "bboxes = [t.attrib[\"bbox\"] for t in all_textlines]\n",
+    "duplicated_lines = boolean_index(all_textlines, is_dublicated(bboxes))\n",
+    "print(duplicated_lines)\n",
+    "for l in duplicated_lines:\n",
+    "    print(ET.tostring(l))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}