diff --git a/.gitattributes b/.gitattributes
index e67ad9797a17bb996e2c9d490b8c4f6ddea0921d..bfb3952ddd6b215c8fe8b106b2c9c2c4b648e99f 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -3,13 +3,6 @@ data/AB/1891/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1892/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1893/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1891/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1894/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1895/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1896/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1897/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1898/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1899/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1900/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1892/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1893/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1894/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
@@ -44,29 +37,3 @@ data/AB/1922/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1923/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1924/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1925/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1901/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1902/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1903/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1904/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1905/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1906/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1907/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1908/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1909/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1910/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1911/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1912/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1913/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1914/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1915/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1916/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1917/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1918/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1919/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1920/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1921/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1922/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1923/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1924/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1925/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
-data/AB/1891/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
diff --git a/.renku/workflow/0134625b44c64399bf6b27b95b5decbd_python.cwl b/.renku/workflow/0134625b44c64399bf6b27b95b5decbd_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..eb23b047a57f11190dcf2aa44977dd23485d8497
--- /dev/null
+++ b/.renku/workflow/0134625b44c64399bf6b27b95b5decbd_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1907/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1907/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1907
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/07000f8c08c84b08aa11fa19f292b975_python.cwl b/.renku/workflow/07000f8c08c84b08aa11fa19f292b975_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..4708b818aacad65b9dbea2d73b3293e0c4b65dc2
--- /dev/null
+++ b/.renku/workflow/07000f8c08c84b08aa11fa19f292b975_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1915/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1915/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1915
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/09c451320b59427fb60bdf37badcb67d_python.cwl b/.renku/workflow/09c451320b59427fb60bdf37badcb67d_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..3f213aa6523aa3a77d59647d4c36c474902f6702
--- /dev/null
+++ b/.renku/workflow/09c451320b59427fb60bdf37badcb67d_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1921/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1921/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1921
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/09ece46a1d5e44a3b804495a76a57aab_python.cwl b/.renku/workflow/09ece46a1d5e44a3b804495a76a57aab_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..05054bf40c1e7b6a3703aa1d434373e217e333c1
--- /dev/null
+++ b/.renku/workflow/09ece46a1d5e44a3b804495a76a57aab_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1925/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1925/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1925
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/0b606b992b0d41e9b626bc38519393fc_python.cwl b/.renku/workflow/0b606b992b0d41e9b626bc38519393fc_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..d0413aa414f7f5beb71e7b06adee137aeeb99505
--- /dev/null
+++ b/.renku/workflow/0b606b992b0d41e9b626bc38519393fc_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1916/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1916/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1916
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/113abe1329c848b8a140aa344d36dcca_python.cwl b/.renku/workflow/113abe1329c848b8a140aa344d36dcca_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..b7be8aeb9b76e8477eb1b91f6badfdc4a1ad8d15
--- /dev/null
+++ b/.renku/workflow/113abe1329c848b8a140aa344d36dcca_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1904/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1904/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1904
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/1a984ee680f64fe19730b24a080d0eef_python.cwl b/.renku/workflow/1a984ee680f64fe19730b24a080d0eef_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..4a8fd25834e0e3609260e23aa921049f19a70986
--- /dev/null
+++ b/.renku/workflow/1a984ee680f64fe19730b24a080d0eef_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1911/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1911/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1911
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/21514d64f82c4f7399fd262c1a43df81_python.cwl b/.renku/workflow/21514d64f82c4f7399fd262c1a43df81_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..83676c5d013399834276f14f3f9bab0ce7c81c71
--- /dev/null
+++ b/.renku/workflow/21514d64f82c4f7399fd262c1a43df81_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1918/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1918/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1918
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/2bb430f219a7477582cd38750b16344c_python.cwl b/.renku/workflow/2bb430f219a7477582cd38750b16344c_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..1fc741bb103b9fd57878a6f60275a358397bd5da
--- /dev/null
+++ b/.renku/workflow/2bb430f219a7477582cd38750b16344c_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1923/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1923/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1923
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/362e1c8d900242068c82670677830cb5_python.cwl b/.renku/workflow/362e1c8d900242068c82670677830cb5_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..1449cd7c6073c2939891c8cf2d32661066dd79bb
--- /dev/null
+++ b/.renku/workflow/362e1c8d900242068c82670677830cb5_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1917/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1917/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1917
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/3d6f16e1a94d4cffa56bee8413551e76_python.cwl b/.renku/workflow/3d6f16e1a94d4cffa56bee8413551e76_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..cd69b9e60700d44931ec232fd94cd2798d23f99e
--- /dev/null
+++ b/.renku/workflow/3d6f16e1a94d4cffa56bee8413551e76_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1914/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1914/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1914
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/41b71f3b5d174a75bfad0e51f84e1b2d_python.cwl b/.renku/workflow/41b71f3b5d174a75bfad0e51f84e1b2d_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..eb27be13292079af60ff1688f008ddf715c31395
--- /dev/null
+++ b/.renku/workflow/41b71f3b5d174a75bfad0e51f84e1b2d_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1910/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1910/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1910
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/43056c2958f14e56a2c8552e6fc5ec6f_python.cwl b/.renku/workflow/43056c2958f14e56a2c8552e6fc5ec6f_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..046effa370c63a18f72ce04897300bc5b6a5aad7
--- /dev/null
+++ b/.renku/workflow/43056c2958f14e56a2c8552e6fc5ec6f_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1908/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1908/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1908
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/45c7d66af65e44b6a70db8bfa39794ac_python.cwl b/.renku/workflow/45c7d66af65e44b6a70db8bfa39794ac_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..82e4571279bf74ab2d76814a00e20bf28886f080
--- /dev/null
+++ b/.renku/workflow/45c7d66af65e44b6a70db8bfa39794ac_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1895/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1895/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1895
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/4cf159cef6f948fba8b9972c0c573938_python.cwl b/.renku/workflow/4cf159cef6f948fba8b9972c0c573938_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..35455e33e26e78375c044b58a6de06b9e5c519cf
--- /dev/null
+++ b/.renku/workflow/4cf159cef6f948fba8b9972c0c573938_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1894/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1894/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1894
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/5a0017a638b443959606d67aa04b7fbd_python.cwl b/.renku/workflow/5a0017a638b443959606d67aa04b7fbd_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..431b870be923936f6f2bd6b56270713633447d6e
--- /dev/null
+++ b/.renku/workflow/5a0017a638b443959606d67aa04b7fbd_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1901/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1901/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1901
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/5d9113bd3e2c4a109fdb2b91acddef69_python.cwl b/.renku/workflow/5d9113bd3e2c4a109fdb2b91acddef69_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..6fe0cfc6d2cc6537ab845e3a869a6f181fd8e6c3
--- /dev/null
+++ b/.renku/workflow/5d9113bd3e2c4a109fdb2b91acddef69_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1891/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1891/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1891
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/6195b525672344f68b6018e91f79e20f_python.cwl b/.renku/workflow/6195b525672344f68b6018e91f79e20f_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..f355b21514c45af88ec85b836a2dfd9ea5875c7d
--- /dev/null
+++ b/.renku/workflow/6195b525672344f68b6018e91f79e20f_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1900/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1900/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1900
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/6599edb3c91c42c3ad6624b8959dad97_python.cwl b/.renku/workflow/6599edb3c91c42c3ad6624b8959dad97_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..4020d70e6fc2841b95c03b0960acb81eee82601e
--- /dev/null
+++ b/.renku/workflow/6599edb3c91c42c3ad6624b8959dad97_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1920/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1920/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1920
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/6d724451628547fc9e9786ebe333a97b_python.cwl b/.renku/workflow/6d724451628547fc9e9786ebe333a97b_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..2e4592888534e25b22bf9f148396316c751a9de7
--- /dev/null
+++ b/.renku/workflow/6d724451628547fc9e9786ebe333a97b_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1922/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1922/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1922
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/72579b6c928c49709680074ba1bff299_python.cwl b/.renku/workflow/72579b6c928c49709680074ba1bff299_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..c5415aa122acef9f7860839ad3deecab3e3d2a5c
--- /dev/null
+++ b/.renku/workflow/72579b6c928c49709680074ba1bff299_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1919/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1919/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1919
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/725b4b7206f747a7a283396bdd0156e1_python.cwl b/.renku/workflow/725b4b7206f747a7a283396bdd0156e1_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..a77062bc1b39e835fa0b528d14458903e7a61255
--- /dev/null
+++ b/.renku/workflow/725b4b7206f747a7a283396bdd0156e1_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1897/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1897/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1897
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/8b8bfceafdef438686a5e5e8ae4feff3_python.cwl b/.renku/workflow/8b8bfceafdef438686a5e5e8ae4feff3_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..5f9bfce89127e5a2f410d53d1c258abf405908ff
--- /dev/null
+++ b/.renku/workflow/8b8bfceafdef438686a5e5e8ae4feff3_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1912/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1912/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1912
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/8cd9002784704cb0bfb8f44b5f5acf71_python.cwl b/.renku/workflow/8cd9002784704cb0bfb8f44b5f5acf71_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..1cea10459f799eb85c7142d3d5bcaf3f34f5a82d
--- /dev/null
+++ b/.renku/workflow/8cd9002784704cb0bfb8f44b5f5acf71_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1902/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1902/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1902
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/90d0c14f67804d9694908085fc4ac36c_python.cwl b/.renku/workflow/90d0c14f67804d9694908085fc4ac36c_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..b2814e9594921b8dfdd9e93c3d6d3f0c15e32af8
--- /dev/null
+++ b/.renku/workflow/90d0c14f67804d9694908085fc4ac36c_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1893/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1893/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1893
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/963b3f1319b5444096f8321b5e2ef1d6_python.cwl b/.renku/workflow/963b3f1319b5444096f8321b5e2ef1d6_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..e922b5bd4db873ecf8a707d36f34fe8c65bd742d
--- /dev/null
+++ b/.renku/workflow/963b3f1319b5444096f8321b5e2ef1d6_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1909/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1909/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1909
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/9e8fab6ce9a241ebb0f0d48701b5f495_python.cwl b/.renku/workflow/9e8fab6ce9a241ebb0f0d48701b5f495_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..b808652745e1dcebff5987b3cda0b30115a54821
--- /dev/null
+++ b/.renku/workflow/9e8fab6ce9a241ebb0f0d48701b5f495_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1899/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1899/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1899
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/a037def564034bdfa1f14ee4452e1b1a_python.cwl b/.renku/workflow/a037def564034bdfa1f14ee4452e1b1a_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..28222079c3e803d0c56d4f55b396e391d9f85974
--- /dev/null
+++ b/.renku/workflow/a037def564034bdfa1f14ee4452e1b1a_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1906/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1906/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1906
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/a2443cc9926047f393466483b1cbecc1_python.cwl b/.renku/workflow/a2443cc9926047f393466483b1cbecc1_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..820121b8f8bd4542dc208e7e4a86c6a0cb31ffbd
--- /dev/null
+++ b/.renku/workflow/a2443cc9926047f393466483b1cbecc1_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1913/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1913/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1913
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/a568622380734974b54df06350ebd1b7_python.cwl b/.renku/workflow/a568622380734974b54df06350ebd1b7_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..8f7d9f6f356d99d65ebe4ed2d87cd3c862f85d9d
--- /dev/null
+++ b/.renku/workflow/a568622380734974b54df06350ebd1b7_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1924/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1924/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1924
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/b0fb11ee2493439197b633b9ad97686f_python.cwl b/.renku/workflow/b0fb11ee2493439197b633b9ad97686f_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..6cc72ab18d96580bf6fa4835067870baf4d951b7
--- /dev/null
+++ b/.renku/workflow/b0fb11ee2493439197b633b9ad97686f_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1905/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1905/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1905
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/d6d4efa1202649e1a9cb3282dfde7afd_python.cwl b/.renku/workflow/d6d4efa1202649e1a9cb3282dfde7afd_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..61e91981f2763f040d6fe9439f167b7f0b7f7cf9
--- /dev/null
+++ b/.renku/workflow/d6d4efa1202649e1a9cb3282dfde7afd_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1896/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1896/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1896
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/e70b158788334dc89bfd1b2207f2ec8b_python.cwl b/.renku/workflow/e70b158788334dc89bfd1b2207f2ec8b_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..cd16afda51bf80397defa522ddfb4d29a54d3fbc
--- /dev/null
+++ b/.renku/workflow/e70b158788334dc89bfd1b2207f2ec8b_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1903/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1903/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1903
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/e74335e7335143d8ae104b3ac0a260df_python.cwl b/.renku/workflow/e74335e7335143d8ae104b3ac0a260df_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..392727b5e2cadd0512473a31c1616571ce50448a
--- /dev/null
+++ b/.renku/workflow/e74335e7335143d8ae104b3ac0a260df_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1892/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1892/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1892
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/e95b78dce28d46d49a9658bc3ad8fafe_python.cwl b/.renku/workflow/e95b78dce28d46d49a9658bc3ad8fafe_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..c1d136f1bd7a54e09cf3bf6c040d983077c40cad
--- /dev/null
+++ b/.renku/workflow/e95b78dce28d46d49a9658bc3ad8fafe_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1898/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1898/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1898
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/data/AB/1891/02_extractedxml.tar.gz b/data/AB/1891/02_extractedxml.tar.gz
index a52f1fbe2011f4658417d578622421048efabf72..6e68262b753d30ac54486b1d51f8f21b34f6d3d6 100644
--- a/data/AB/1891/02_extractedxml.tar.gz
+++ b/data/AB/1891/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4fdbaeb1b68ff4ad3a5d23ab109638a0e68ad62a969a06fd5486eeaad9020a05
-size 43120374
+oid sha256:408b299adff3e39a3d195b486f89c8bfcf6a85ffb91a30256d559cd79b599b8b
+size 43120412
diff --git a/data/AB/1892/02_extractedxml.tar.gz b/data/AB/1892/02_extractedxml.tar.gz
index 9798fcff3d534b0ab1c60f914a164629428f2ed4..e286b5606fca7f2f433cdfd7a5de8fcda5eb031c 100644
--- a/data/AB/1892/02_extractedxml.tar.gz
+++ b/data/AB/1892/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29657b1d322fdd8099d0f3c5fb50ecfa251b2ee0ebb618b1a2b77297a342d5dc
-size 16471798
+oid sha256:7c8207f107ff80317aa64de940930118ffa1b22dd73749cfc22db4f2c5abc018
+size 16471820
diff --git a/data/AB/1893/02_extractedxml.tar.gz b/data/AB/1893/02_extractedxml.tar.gz
index 35b8a55ded95ba03069cd214a73a0449e9e166cb..f3e0901641beb5f1f7f5cce7f065f7ab7ccd05ff 100644
--- a/data/AB/1893/02_extractedxml.tar.gz
+++ b/data/AB/1893/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b25a1963a5635f88c19fa0bcdcc4f4b94a657163cb3300cd32a4e7a26ead816
-size 47034409
+oid sha256:5dcd3bfa25b8e7cb1906cd0072b999f186ea7994632acf7c5f0f4cd38dc48d43
+size 47034427
diff --git a/data/AB/1894/02_extractedxml.tar.gz b/data/AB/1894/02_extractedxml.tar.gz
index 8e5ec482c0eb6ce2df44279b659834bdba432e92..5a1d7b4670a407c361e688b005d2268836b7b705 100644
--- a/data/AB/1894/02_extractedxml.tar.gz
+++ b/data/AB/1894/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:39aa705c25e01108a836bd9207d63e9121bcb613390765c39d75c067257965bb
-size 44175939
+oid sha256:b18403d8f232df02cffabc2e850c4be1605cea632cd7fdf04fadb975d50bdb5a
+size 44175955
diff --git a/data/AB/1895/02_extractedxml.tar.gz b/data/AB/1895/02_extractedxml.tar.gz
index cc7ddd29c3db8d96cff8d4bc6969856b5f3a06d3..c18afc0072112b28ba161cdce80f2c93548ce314 100644
--- a/data/AB/1895/02_extractedxml.tar.gz
+++ b/data/AB/1895/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c53569da11cbe50c4b0fa211f4415f6104fc49934350e1564de678137ed88602
-size 79039405
+oid sha256:e0f19dc3f5d0a2793b89397f4e91051e68466e323f25697b50c91a90322ce2d7
+size 79039437
diff --git a/data/AB/1896/02_extractedxml.tar.gz b/data/AB/1896/02_extractedxml.tar.gz
index 3aebb31193c5ea7ac8232fe7bd466351ddbd3431..bb75696b6f643ce7deefb43b13922b806fd9a757 100644
--- a/data/AB/1896/02_extractedxml.tar.gz
+++ b/data/AB/1896/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b56c8734bff66cb2770ff2c5c0777c17a1c184ef187628016410bdc9d1919a9
-size 21093991
+oid sha256:45c2566cca456ac406cac729e2f216b352a6de9c4782a3ebe37d972e00c854cf
+size 21093983
diff --git a/data/AB/1897/02_extractedxml.tar.gz b/data/AB/1897/02_extractedxml.tar.gz
index a6f71f967fb8af5ef5ea26ce69bc1262ea634c04..756d8a65051f54ec6a2ebb4db2113e8508034e87 100644
--- a/data/AB/1897/02_extractedxml.tar.gz
+++ b/data/AB/1897/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:811ad7823efc58d498beb773fdc109721320013ed579b5ccfb02afcb2060350e
-size 96543758
+oid sha256:9a7fdccc1ec24d0342a8d010bcc658764d655e33ade687711d4e9647541249b4
+size 96543739
diff --git a/data/AB/1898/02_extractedxml.tar.gz b/data/AB/1898/02_extractedxml.tar.gz
index ba236149db2e2c5ad28bbbea79fa9c531eb89231..9991b79f8b138068209a6b2c424949ed4caefa9d 100644
--- a/data/AB/1898/02_extractedxml.tar.gz
+++ b/data/AB/1898/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52800975fbb39002bcf6aae0bf2c5eef168f8c537a5305b25ad019c13d132fa9
-size 36932970
+oid sha256:71b8968b4a795065115bebd8396a0c190d788686549bde474ebc71a3bfe96316
+size 36933004
diff --git a/data/AB/1899/02_extractedxml.tar.gz b/data/AB/1899/02_extractedxml.tar.gz
index a3b6e6485a2256a1cdd48b3f71fe96e251408d4e..2a5b07dce6a371dcd41f572ee8ebb0db8d8071af 100644
--- a/data/AB/1899/02_extractedxml.tar.gz
+++ b/data/AB/1899/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a48d744c19bc7c55e5d530e5e813251835e0c758232556a46985e1a823e7fd3
-size 69289232
+oid sha256:074be7ff382ba1c6d1e78737fbf8254f09d9f895f862c07f99b10850f2eb18a9
+size 69289241
diff --git a/data/AB/1900/02_extractedxml.tar.gz b/data/AB/1900/02_extractedxml.tar.gz
index 3a4af72f9293c3b848b5f729ec524b4e9a7321c9..26f4c10969edb18f4db7ab786dac1747172c562b 100644
--- a/data/AB/1900/02_extractedxml.tar.gz
+++ b/data/AB/1900/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5491f37532eb3d7676173d89b6ad9a62fe020bf3db44ddabe05dd2d5bc8a9bf6
-size 46927015
+oid sha256:340092f91d6a499ca3efb0aa95e5636959aaacb112467c80a71849ec573087fa
+size 46927009
diff --git a/data/AB/1901/02_extractedxml.tar.gz b/data/AB/1901/02_extractedxml.tar.gz
index f8fdc2665f341d0a568b7ce4eefcdf4b7bfedebc..7fb6854159b749b4de4c49a36a6b4f0e0c3872ad 100644
--- a/data/AB/1901/02_extractedxml.tar.gz
+++ b/data/AB/1901/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f94967f298ce1c0d0821ae8945375a6b1e0813fbc1bcecb8fd65b67d2a1dbb2
-size 52036683
+oid sha256:f70a672ce6ca37f4a19fccd0dfe81df8ad0d44e4c59f463b273ce463e5092cc8
+size 52036679
diff --git a/data/AB/1902/02_extractedxml.tar.gz b/data/AB/1902/02_extractedxml.tar.gz
index 96c7bede8459d74e9e3ee06a5b7e88d3dd13fa7c..0ea6ee80a568a598981b59d1cd6f4133a83fd856 100644
--- a/data/AB/1902/02_extractedxml.tar.gz
+++ b/data/AB/1902/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:22a65dd7bb2485c6af21d1f131fd3a7d9e51983ffd3fc7df25cf454c3b00e374
-size 51586731
+oid sha256:9442df1afa9408b92d5b2f5f446cceba51b612b1fe0adcbe7ffd2dc01838908b
+size 51586736
diff --git a/data/AB/1903/02_extractedxml.tar.gz b/data/AB/1903/02_extractedxml.tar.gz
index 87f416062857121df9099a58b53495cb100f4d4d..e02ffba85decadc9ea48469b1a6084d43fd4f5c2 100644
--- a/data/AB/1903/02_extractedxml.tar.gz
+++ b/data/AB/1903/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:923afd1b10e5fec91b391a44e5be9512c7b05781388e34ae877640ab353926a5
-size 63145160
+oid sha256:444357866369ef571e6d6c4010b85014c348fc2006fac25470479e3a4303d2e6
+size 63145179
diff --git a/data/AB/1904/02_extractedxml.tar.gz b/data/AB/1904/02_extractedxml.tar.gz
index 294f1e46742fee4e6108a737c259c470272acb9e..ae6cfbad9ddb91d9506531947fe9611015ef78c8 100644
--- a/data/AB/1904/02_extractedxml.tar.gz
+++ b/data/AB/1904/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:afd79a1d316ddd2f7a05b71c6d4e78ddb78c49d40aa870881271f7409d2c9aad
-size 40694839
+oid sha256:2caf2bd818f28c03cde6fb9a304aa2a0f028b2e6a3ec2669337fa1f2e50d4e66
+size 40694830
diff --git a/data/AB/1905/02_extractedxml.tar.gz b/data/AB/1905/02_extractedxml.tar.gz
index abd4c11e94266367c9ccec3a732e68ec990faba0..7885248aa8a93f464c16461e106f7733e11e8cc6 100644
--- a/data/AB/1905/02_extractedxml.tar.gz
+++ b/data/AB/1905/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ec45078e9d561bd158958757d2bff7703e37219c4ac655cce9e799500a4c476
-size 90126856
+oid sha256:e59a6159330c8379022fef70ad09023ca8d9943328ebcf4ba7b8d58a72b5f92d
+size 90126820
diff --git a/data/AB/1906/02_extractedxml.tar.gz b/data/AB/1906/02_extractedxml.tar.gz
index 75b69b4ccaae10a918ce5898edd688d4d4a6b6c6..df6e6b8487fb6e146a534a4eb29de9d5f4b40b1f 100644
--- a/data/AB/1906/02_extractedxml.tar.gz
+++ b/data/AB/1906/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b883e514e2d505ac2ba4fcb3dc7cdd5122b1b53a249551f83cc695315152711f
-size 98362889
+oid sha256:069e3594e607080c589b9e186cf3a5537878983c92a4002dac4cf7adb008c4fa
+size 98362910
diff --git a/data/AB/1907/02_extractedxml.tar.gz b/data/AB/1907/02_extractedxml.tar.gz
index bdea5ab3c6000bc64f84024d1e7e05640bab1309..6a63a1121d0477a42d7242437c40abeb244627ba 100644
--- a/data/AB/1907/02_extractedxml.tar.gz
+++ b/data/AB/1907/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:73882e08fcd49263b82dbbe12792d205b8b5ff7c8100284ae3e8da0336258ce3
-size 83882771
+oid sha256:4dcc31484abeb702b8cd58b6bf00bc091830273e9f39f97d99ff4846ccb649c7
+size 83882734
diff --git a/data/AB/1908/02_extractedxml.tar.gz b/data/AB/1908/02_extractedxml.tar.gz
index fad38219bd91fcf91aadf54013ff0d2698cb50df..6577130709841988c5604e8f1fbcdfecf47f7163 100644
--- a/data/AB/1908/02_extractedxml.tar.gz
+++ b/data/AB/1908/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1385d4d0226968a2e2d5c86b605df1934467ad3049556d7b6bfda42202b08845
-size 69250187
+oid sha256:004f343106672d4391d64a6e321ec21dfc0ea8fb10b82cd8b8d573412ff52ff1
+size 69250176
diff --git a/data/AB/1909/02_extractedxml.tar.gz b/data/AB/1909/02_extractedxml.tar.gz
index d7baef781a1c71d8920d5cd3dff3173bafd53b82..963abe268c13f81c79a266827d824f368db353cd 100644
--- a/data/AB/1909/02_extractedxml.tar.gz
+++ b/data/AB/1909/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc700ab63e9aa7b0f7f3ad90b28a881e647b52e358d5fab50c86d0ea0c072464
-size 80056576
+oid sha256:418ea8c78b133e7480f83162118199291e5d19a1bd8d53088f95b6cf7cb7e08a
+size 80056573
diff --git a/data/AB/1910/02_extractedxml.tar.gz b/data/AB/1910/02_extractedxml.tar.gz
index 1eb590ea4adb3009dee6594366dc896eed5161b2..b07dddacc0c5f217cab2118390122ec878352f41 100644
--- a/data/AB/1910/02_extractedxml.tar.gz
+++ b/data/AB/1910/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ff38d9dec6f4b48d07d3248805f66a5655917f1770cc925650dc0794abfb022c
-size 61097134
+oid sha256:1e780e132fc02c5efa8017433980f26ad51c25ad4334d8d33e015c5875035ae7
+size 61097167
diff --git a/data/AB/1911/02_extractedxml.tar.gz b/data/AB/1911/02_extractedxml.tar.gz
index 855850707c002a6af69e5d414f4b2db24699b4f7..3e0c7d84ff9dbcacecccd5783b963623b30ed8f3 100644
--- a/data/AB/1911/02_extractedxml.tar.gz
+++ b/data/AB/1911/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa6c5146f2756bb15334e1f04fa19445d8c35d1857623f33f79b87737ace06a9
-size 44637529
+oid sha256:4a0a00db7426d9c9659a41f1efa37f72a9dc04a03b6f694b5ca44a4e9b301f8f
+size 44637530
diff --git a/data/AB/1912/02_extractedxml.tar.gz b/data/AB/1912/02_extractedxml.tar.gz
index 8d55a7e434d8523aabdc2280905cacfce8fa768c..95709c199f01353326968bc78f5bca59b486ee54 100644
--- a/data/AB/1912/02_extractedxml.tar.gz
+++ b/data/AB/1912/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc62302faac0adc10ff8a4734625a7d9f7af4ddbddcb84afb18b16157a2c617a
-size 37625247
+oid sha256:3fbbe9a761a20cfe15cce37d928a59ffb639a2fd8684a2c896354bcb5d6a8251
+size 37625257
diff --git a/data/AB/1913/02_extractedxml.tar.gz b/data/AB/1913/02_extractedxml.tar.gz
index 9531cc026d99ff187aa9a5d823f75c2d81b200ed..8ce39285664e3cfce9950af75c2231efcafeba78 100644
--- a/data/AB/1913/02_extractedxml.tar.gz
+++ b/data/AB/1913/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e491ca98d080061081c9ed832c8d27ed63162abe838ccaf4ad8f509f69d653b
-size 97211256
+oid sha256:2567e495d973e5c98deb4646dffcc73048623c4a0b5d311c2ffb4767c64a4e1c
+size 97211206
diff --git a/data/AB/1914/02_extractedxml.tar.gz b/data/AB/1914/02_extractedxml.tar.gz
index 71a7d0b8303b290a75865b7442b97eff173b0e49..fb10010d2d3a2d94a53156105bfd3283765d85ae 100644
--- a/data/AB/1914/02_extractedxml.tar.gz
+++ b/data/AB/1914/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7320c82c359ebe0ddc594d931d19560dfa3e5a83cb801c77057b9d79f5dd304f
-size 55913465
+oid sha256:3a4e6c747317a7f2fe4b165f47dc083a1c0a6b92ae87d61ee0e66fc9f500cb39
+size 55913444
diff --git a/data/AB/1915/02_extractedxml.tar.gz b/data/AB/1915/02_extractedxml.tar.gz
index 8aabe2b0aa1d10e3e5429e6ff7a55609bd87bd32..d274bd88fe05487a43751509b1c84f7af95df59b 100644
--- a/data/AB/1915/02_extractedxml.tar.gz
+++ b/data/AB/1915/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8429c7ea80efc326c99fae0761dfde2088e747422902581fada5af2a1d21d86
-size 46903150
+oid sha256:c2d78efe3d1a1b68a0e241d8e675fe624aa4f8bdcf7222a6007dc4cb4e29665b
+size 46903151
diff --git a/data/AB/1916/02_extractedxml.tar.gz b/data/AB/1916/02_extractedxml.tar.gz
index cf6692c845386863bb1353fc2b81731c5793b9b7..7e3a4db5050095b2e057d27a9a7394ec85f3af73 100644
--- a/data/AB/1916/02_extractedxml.tar.gz
+++ b/data/AB/1916/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:40f63f84371f6ab033294ef2326794ee4687436fc25c5be50b8f29b103d31151
-size 43422622
+oid sha256:3e433375080decdd5ab5b29e2cc90c86c831f9cc4baee1cea0df3e423264d7bf
+size 43422615
diff --git a/data/AB/1917/02_extractedxml.tar.gz b/data/AB/1917/02_extractedxml.tar.gz
index 8450adf40d3b966c985f1185c8f124efec7511a3..d0ed765f3602f8988dd6b31a01e3e1f830b876f2 100644
--- a/data/AB/1917/02_extractedxml.tar.gz
+++ b/data/AB/1917/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db3aa2ec70a4fda5cb4d3b580205105dee268ce9cd0c934b370c5e074a9bcc4e
-size 65281000
+oid sha256:afa0ec18aab452fca441a948bf757e824db600127eb6f77c2565a0627e8f1417
+size 65281006
diff --git a/data/AB/1918/02_extractedxml.tar.gz b/data/AB/1918/02_extractedxml.tar.gz
index 5f898e398be638ccff13aa91e8a0e59fd8b92415..473a7685bba9616ad2dd3e59a44a4df61da03477 100644
--- a/data/AB/1918/02_extractedxml.tar.gz
+++ b/data/AB/1918/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1400cb8a87e1162ff14cabaa01346f7511eecc35364d77ebf8db20375e089f1f
-size 72797535
+oid sha256:c932a07e67ac349e093d9985b26dc13f842746aa6b2196b3984b84e4485e07ae
+size 72797531
diff --git a/data/AB/1919/02_extractedxml.tar.gz b/data/AB/1919/02_extractedxml.tar.gz
index 226cfd394e339aef4260426c08c210d16a134eae..67a61abef86d22bbe6689e906c9f12400ab379f7 100644
--- a/data/AB/1919/02_extractedxml.tar.gz
+++ b/data/AB/1919/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:790099ab2f39cba7de63496c24603346830c977b0dabf1213d60745be2bd17b0
-size 145566284
+oid sha256:04c4981a14d7981dde0ab8c116c43bf92b99386872224049c8ee7d5f2a163d1a
+size 145566318
diff --git a/data/AB/1920/02_extractedxml.tar.gz b/data/AB/1920/02_extractedxml.tar.gz
index bb37cebc180759febac7a558d2dafcaa5c2eb5e4..a3653f796855a30f1fa34e9e98867394c56bcd1a 100644
--- a/data/AB/1920/02_extractedxml.tar.gz
+++ b/data/AB/1920/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1427f6549b4856fb1fedf77fef914c4a92b7232acfa231b44b4ee18404a321c
-size 140048195
+oid sha256:35df36b6c4277920577ad4b03243a295f72c3cd930daf4cdd2986a7eee0e9d72
+size 140048209
diff --git a/data/AB/1921/02_extractedxml.tar.gz b/data/AB/1921/02_extractedxml.tar.gz
index 01cdd63839fa2135a05f9fba7ecf9e2232be6845..ae6ccb04e9a08999215cb6e1ead288d012c10b34 100644
--- a/data/AB/1921/02_extractedxml.tar.gz
+++ b/data/AB/1921/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5182afeba26194adf4d30244c59628c8791ab0a57c1ea249200a18ec83c90fb8
-size 114138222
+oid sha256:f5fbfe5a792f4df35f73151f6cf6d02d44c95c466e64efb038adbdb911e28bcc
+size 114138354
diff --git a/data/AB/1922/02_extractedxml.tar.gz b/data/AB/1922/02_extractedxml.tar.gz
index df5e084d0a7c51ed9f4f21176684ed5681191c4b..7e8d4bc026a9beea688f5821d86f7c716322c497 100644
--- a/data/AB/1922/02_extractedxml.tar.gz
+++ b/data/AB/1922/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c063dad39d1fbfd392ce0bf24d92e89206f6c57ef2ad7f0f760670ad67da806
-size 128061825
+oid sha256:bd0658887b3c994d3fbf66cc74b2ad67d581c782299317c73fa8b2e8eedd2b70
+size 128061878
diff --git a/data/AB/1923/02_extractedxml.tar.gz b/data/AB/1923/02_extractedxml.tar.gz
index 701b6ec02bda130cf46b889c7832d85312ca479a..adb5a50db1c4be1b8128fa39b59b45a40d728ed5 100644
--- a/data/AB/1923/02_extractedxml.tar.gz
+++ b/data/AB/1923/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8923538384bc57aa3893aaa774be3fed25c44c875035a07b389633ec970f03f
-size 97677775
+oid sha256:162cc67bed999e657ed6f216dcfd3f483b3de2aad8e6e5f3ddc19d8949cadeda
+size 97677744
diff --git a/data/AB/1924/02_extractedxml.tar.gz b/data/AB/1924/02_extractedxml.tar.gz
index d1e986df5eb34bc6ae2d6a9ca7153ee78cc598fa..46ad790e16daeaa28d200510d3f2731e0d8643fe 100644
--- a/data/AB/1924/02_extractedxml.tar.gz
+++ b/data/AB/1924/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd237e6e21d8ef2cd62cfde972a6bbb666502a1d21922ca6cfb61cee109b986c
-size 118287429
+oid sha256:95c77278a702854ac1b30632a0a8c8b3601937b7f4e02750ebdf93e9f93a8460
+size 118287434
diff --git a/data/AB/1925/02_extractedxml.tar.gz b/data/AB/1925/02_extractedxml.tar.gz
index fe91216c88ecd99241c6dd85dfb0b9ec5ba78658..be28bf3a7ae22a70ee4f4011085d13cf21bb9909 100644
--- a/data/AB/1925/02_extractedxml.tar.gz
+++ b/data/AB/1925/02_extractedxml.tar.gz
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:97b5654a8ff3b1784295c0eba1539328d5ff9d01a7f4a2a54c4441a838660477
-size 117419406
+oid sha256:4ce973d351d5e1198ccaa5170576c2da1c6370c6b7e4b0365e88a2cb9a89892a
+size 117419414
diff --git a/src/python/run_correctxml.py b/src/python/run_correctxml.py
index 4d2a0ebaabaef3cbd965a6a66ebbc762975aec6a..45543847c34d783fb8792afb882c159f7a69ed5a 100644
--- a/src/python/run_correctxml.py
+++ b/src/python/run_correctxml.py
@@ -53,6 +53,6 @@ for infile in files_proc:
 
 # Commands to get the compressed version of the file
 #data/AB/${year}/02_extractedxml.tar.gz
-utils_proc.compress_tar(output_file)
-    
+utils_proc.compress_tar(output_file)            
+            
 print('Total time for correction of year %d: %f' % (int(year_tocomp) ,(time.time() - t1)))
\ No newline at end of file
diff --git a/src/python/run_extract_origxml.py b/src/python/run_extract_origxml.py
index bb803280d9946cb400d6a0d9fefae8607a51f970..e283788c582df6027b698030f1348f4a66f11342 100644
--- a/src/python/run_extract_origxml.py
+++ b/src/python/run_extract_origxml.py
@@ -52,6 +52,6 @@ for infile in files_proc:
 
 # Commands to get the compressed version of the file
 #data/AB/${year}/02_extractedxml.tar.gz
-utils_proc.compress_tar(output_file)
-             
+utils_proc.compress_tar(output_file)                
+            
 print('Total time for year %d: %f' % (int(year_tocomp) ,(time.time() - t1)))
\ No newline at end of file
diff --git a/src/python/utils_proc.py b/src/python/utils_proc.py
index a4f55a026cd161def86508233985eb2c42b6dc10..7348be81153232cfda56abd863a6f957acf9a061 100644
--- a/src/python/utils_proc.py
+++ b/src/python/utils_proc.py
@@ -158,7 +158,7 @@ def compress_tar(infile, outname = ''):
     call_with_out(c2)
     c3 = 'rm -rf ' + str(year)
     call_with_out(c3)
-
+    
 
 def correct_metadata(year, id_doc, flag_end):