diff --git a/.gitattributes b/.gitattributes
index 3ceadf228852756feeebe34d34564f26caabbaa5..3c911cee18c62f7943df355469b57835db431c7b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -169,6 +169,7 @@ data/train_NER/20190109_train_NER.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1970/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1971/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/train_NER/20190116_train_NER_french.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1972/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1926/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1927/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1928/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
@@ -241,3 +242,14 @@ data/AB/1994/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1995/03_correctedmeta.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/politicians/MPs_after1890.csv filter=lfs diff=lfs merge=lfs -text
 data/politicians/lastnames/** filter=lfs diff=lfs merge=lfs -text
+data/AB/1973/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1975/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1976/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1974/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1980/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1981/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1977/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1982/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1983/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1991/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1991/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
diff --git a/.renku/workflow/337b046f45a64dec9c810ea6e51ecaad_python.cwl b/.renku/workflow/337b046f45a64dec9c810ea6e51ecaad_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..8f8df800e526f544aadb6296ce07959ce6783d3b
--- /dev/null
+++ b/.renku/workflow/337b046f45a64dec9c810ea6e51ecaad_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_correctxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1973/02_extractedxml.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1973/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1973
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/55db25b9be3e4de8a799064ce8a81038_python.cwl b/.renku/workflow/55db25b9be3e4de8a799064ce8a81038_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..60cb7b6b64955af6734cdc5085559545f9ccc7a2
--- /dev/null
+++ b/.renku/workflow/55db25b9be3e4de8a799064ce8a81038_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1981/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1981/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1981
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/61efd03ecd5a474e86750b7d60a0fe29_python.cwl b/.renku/workflow/61efd03ecd5a474e86750b7d60a0fe29_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..d8ea2da4010643e2d712b972595897236af2f55b
--- /dev/null
+++ b/.renku/workflow/61efd03ecd5a474e86750b7d60a0fe29_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1991/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1991/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1991
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/7b69fad35c574cbfb28b966576580e0d_python.cwl b/.renku/workflow/7b69fad35c574cbfb28b966576580e0d_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..bfd6aa94e19ac1e78ad7275032beb77935681205
--- /dev/null
+++ b/.renku/workflow/7b69fad35c574cbfb28b966576580e0d_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1980/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1980/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1980
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/95aa274372e6493190e78f082644bc24_python.cwl b/.renku/workflow/95aa274372e6493190e78f082644bc24_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..2848aa558dd8961d372c5c5cea47d12286191413
--- /dev/null
+++ b/.renku/workflow/95aa274372e6493190e78f082644bc24_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1982/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1982/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1982
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/a29a24b4b00f4b7eaf5540b98f81966a_python.cwl b/.renku/workflow/a29a24b4b00f4b7eaf5540b98f81966a_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..b150efbbbc130a6cd49b09485c7493eb99a6bd16
--- /dev/null
+++ b/.renku/workflow/a29a24b4b00f4b7eaf5540b98f81966a_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_correctxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1972/02_extractedxml.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1972/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1972
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/a529202c4bba462f8b66a848b64eb10f_python.cwl b/.renku/workflow/a529202c4bba462f8b66a848b64eb10f_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..ff090ca30c8768c448c2e4b07cce37b58de8b567
--- /dev/null
+++ b/.renku/workflow/a529202c4bba462f8b66a848b64eb10f_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_correctxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1991/02_extractedxml.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1991/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1991
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/ae0772e5030141aea58238ddba633337_python.cwl b/.renku/workflow/ae0772e5030141aea58238ddba633337_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..03abb751e2cc7141b9a95d9031b25007a8705fbf
--- /dev/null
+++ b/.renku/workflow/ae0772e5030141aea58238ddba633337_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1983/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1983/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1983
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/b46e116548cc4088b25c047ad4b262ba_python.cwl b/.renku/workflow/b46e116548cc4088b25c047ad4b262ba_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..5a837e26475eaf523773693e8fe299dbb5f7321a
--- /dev/null
+++ b/.renku/workflow/b46e116548cc4088b25c047ad4b262ba_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1975/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1975/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1975
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/ced9d30b55bc41c7aeb714cd1d863012_python.cwl b/.renku/workflow/ced9d30b55bc41c7aeb714cd1d863012_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..67dea3795f9c6c470703e9d053f0f22f80bd26b0
--- /dev/null
+++ b/.renku/workflow/ced9d30b55bc41c7aeb714cd1d863012_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_correctxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1974/02_extractedxml.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1974/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1974
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/d4a366e49dc54eee9cc13a47e884b9e5_python.cwl b/.renku/workflow/d4a366e49dc54eee9cc13a47e884b9e5_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..149d732502db5cf746d80f61af9463bc06b12906
--- /dev/null
+++ b/.renku/workflow/d4a366e49dc54eee9cc13a47e884b9e5_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1977/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1977/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1977
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/e932e705a4434ee09512f22997fa627f_python.cwl b/.renku/workflow/e932e705a4434ee09512f22997fa627f_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..3d6ea5d6e043a9fef5ec10e45ecc58d9fac870b8
--- /dev/null
+++ b/.renku/workflow/e932e705a4434ee09512f22997fa627f_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_origxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1976/00_rawpdfs.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1976/02_extractedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1976
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/data/AB/1972/04_correctedxml.tar.gz b/data/AB/1972/04_correctedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..a936494ecf16a998574d4505b08ae421d9539c1e
--- /dev/null
+++ b/data/AB/1972/04_correctedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fc2b7da468479d966114d65d094869b5cb0d362805397138b98783b4f61187c
+size 32454837
diff --git a/data/AB/1973/04_correctedxml.tar.gz b/data/AB/1973/04_correctedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..328b4066e7441948288c136d187f897490adc17f
--- /dev/null
+++ b/data/AB/1973/04_correctedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d4a6ad272fc8e6015fb2da4f16a7f4363d7d835427cf9c4777294f982424e6b
+size 20865709
diff --git a/data/AB/1974/04_correctedxml.tar.gz b/data/AB/1974/04_correctedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..4b22e2c3e141a453a88f320d14d4dce26e3db7a8
--- /dev/null
+++ b/data/AB/1974/04_correctedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51ed18e34920d0f4a6402cf9a96ecdd12dc47ccb3a2abab0976bc5911dff8ec0
+size 22540482
diff --git a/data/AB/1975/02_extractedxml.tar.gz b/data/AB/1975/02_extractedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..3ec32677fc873e503caecf4042106bb198b29aab
--- /dev/null
+++ b/data/AB/1975/02_extractedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9cb09c4ec6b75766c80221307ec56c25269ed0d6256fed85d34d6cee3a919740
+size 379227481
diff --git a/data/AB/1976/02_extractedxml.tar.gz b/data/AB/1976/02_extractedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..4b6b63886dece072eafca28b2839ac7409cc61ad
--- /dev/null
+++ b/data/AB/1976/02_extractedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc5f661baa69055ad3e624c87c61de9912b4f0c0dfc7895b85ffbc55ab6644ab
+size 321115032
diff --git a/data/AB/1977/02_extractedxml.tar.gz b/data/AB/1977/02_extractedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..d3d86d0b41745fb8b6e22af9b580ab3835c07dea
--- /dev/null
+++ b/data/AB/1977/02_extractedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:712d29e255e7df6857be63ca07b46fe767193bba618694325da6ab1ad69491e7
+size 314960855
diff --git a/data/AB/1980/02_extractedxml.tar.gz b/data/AB/1980/02_extractedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..9a49a4f352da8f831afa549a7ab7bfdfb699bba1
--- /dev/null
+++ b/data/AB/1980/02_extractedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0baf515b60277460abe248e29981e2fb35cb335b5bcd8fe6efcb8d325738f59
+size 325333041
diff --git a/data/AB/1981/02_extractedxml.tar.gz b/data/AB/1981/02_extractedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..94e8d0fa81abff2c90a60f0974f392c63fcdca3b
--- /dev/null
+++ b/data/AB/1981/02_extractedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8ff09ddaa77c85d59601eed6e8817489db27a055f353cdef9aae7a28e24db03
+size 341425000
diff --git a/data/AB/1982/02_extractedxml.tar.gz b/data/AB/1982/02_extractedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..7a73875c1cea2143d450fea4c66eb6de3b239068
--- /dev/null
+++ b/data/AB/1982/02_extractedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b23affdc93e07cd6c6348ce5816b945f2f22f5d570b34517ece1b6b6f2e6dbc4
+size 322541816
diff --git a/data/AB/1983/02_extractedxml.tar.gz b/data/AB/1983/02_extractedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..fa9744c30b883f4079d245a176f7505055b27b96
--- /dev/null
+++ b/data/AB/1983/02_extractedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ab9c3e50b2595f534af638ed1fa1ea8e86f102966f17855fd1ea154dc97bd20
+size 302597163
diff --git a/data/AB/1991/02_extractedxml.tar.gz b/data/AB/1991/02_extractedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..d62dd1a39a0ab010b35b30ab3e856ef42b65c5a3
--- /dev/null
+++ b/data/AB/1991/02_extractedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abd690cf237cfc067e717432e64e03c670fe56392ffd14e98fe70b015024fa81
+size 431087808
diff --git a/data/AB/1991/04_correctedxml.tar.gz b/data/AB/1991/04_correctedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..3c2b7184d659cc3b5d0e3abeab841188bd2f33b9
--- /dev/null
+++ b/data/AB/1991/04_correctedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed1bd53b584fca43253e09d30f9763daaa17ca599f200cc61b88608163c56a19
+size 29447328
diff --git a/src/sh/execute_per_year_isolation.sh b/src/sh/execute_per_year_isolation.sh
new file mode 100755
index 0000000000000000000000000000000000000000..cd3f676b994f05672484e300931c3c452317003a
--- /dev/null
+++ b/src/sh/execute_per_year_isolation.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+# The input variables are:
+# 1 - name of python function to run
+# 2 - folder of the database
+# 3 - name of input file
+# 4 - name of output file
+
+year_start=$5
+year_end=$6
+
+for year in $(seq $year_start $year_end)
+do
+    echo $year
+    $CONDA_DIR/envs/renku/bin/renku run --isolation python $1 ${2}/$year/${3}.tar.gz ${2}/$year/${4}.tar.gz
+done
+