diff --git a/.gitattributes b/.gitattributes
index 976aa3ea4f49fbe4d85357c19a4d69cfbfb4913f..60b77b54748b2e660dbb0ebac8e29f9e564fbbce 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -262,3 +262,83 @@ data/politicians/MPs_after1890.csv filter=lfs diff=lfs merge=lfs -text
 data/politicians/lastnames/** filter=lfs diff=lfs merge=lfs -text
 data/train_NER/20190109_train_NER.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/train_NER/20190116_train_NER_french.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1973/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1975/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1976/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1974/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1980/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1981/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1977/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1982/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1983/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1991/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1991/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/lists/dict_overlaps.pickle filter=lfs diff=lfs merge=lfs -text
+data/lists/all_titles.csv filter=lfs diff=lfs merge=lfs -text
+data/AB/1978/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1979/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1975/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1891/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1892/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1893/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1894/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1895/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1896/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1897/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1898/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1899/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1900/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1991/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1901/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1902/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1903/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1904/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1905/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1906/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1907/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1908/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1909/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1910/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1911/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1912/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1913/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1914/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1915/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1916/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1917/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1918/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1919/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1920/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1921/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1922/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1923/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1924/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1925/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1926/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1927/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1928/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1929/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1930/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1931/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1932/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1933/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1934/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1935/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1936/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1937/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1938/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1939/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1940/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1941/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1942/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1943/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1944/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1945/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1946/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1947/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1948/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1949/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1950/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1976/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1977/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1975/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 9a187c9b9de280eadceda624e53b087796a9bed4..ac6bfbfb7f6f29aa500e4355baf8a4f94083ffad 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -24,8 +24,9 @@ image_build:
 
 dot:
   stage: build
-  image: renku/singleuser:latest
+  image: renku/renku-python:latest
   script:
+    - renku --version
     - renku log --format dot $(git ls-files --no-empty-directory --recurse-submodules) > graph.dot
   artifacts:
     paths:
diff --git a/.renku/workflow/04e7976838dc4d37bb8159375d17fd04_python.cwl b/.renku/workflow/04e7976838dc4d37bb8159375d17fd04_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..a765424ca542b2bd09791394a33ff6c182f41caa
--- /dev/null
+++ b/.renku/workflow/04e7976838dc4d37bb8159375d17fd04_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1900_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1900/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1900/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1900/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1900
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/08070d37200f4c08ae0ae5d950b77ede_python.cwl b/.renku/workflow/08070d37200f4c08ae0ae5d950b77ede_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..1ef045de18aca1b308af3634fa8bc0cab6d11073
--- /dev/null
+++ b/.renku/workflow/08070d37200f4c08ae0ae5d950b77ede_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1897_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1897/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1897/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1897/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1897
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/096cbe6860624be295bb368f984ccf0c_python.cwl b/.renku/workflow/096cbe6860624be295bb368f984ccf0c_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..8de7ba08890a43371f8023c0c37fc549b7ec8737
--- /dev/null
+++ b/.renku/workflow/096cbe6860624be295bb368f984ccf0c_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1943_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1943/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1943/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1943/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1943
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/09c91cabdeb84f79b46299ed82540e2b_python.cwl b/.renku/workflow/09c91cabdeb84f79b46299ed82540e2b_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..048bdf2962c3293551e8d9bf09e0c03b2643a3b0
--- /dev/null
+++ b/.renku/workflow/09c91cabdeb84f79b46299ed82540e2b_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1917_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1917/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1917/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1917/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1917
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/0c03aec5c51a4c49b79accd91c4537c6_python.cwl b/.renku/workflow/0c03aec5c51a4c49b79accd91c4537c6_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..a48da6d7656a302435f6a0bde2849270d9b8de5d
--- /dev/null
+++ b/.renku/workflow/0c03aec5c51a4c49b79accd91c4537c6_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1913_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1913/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1913/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1913/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1913
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/0c251d63a59149b68a93f7ac34a0e4ce_python.cwl b/.renku/workflow/0c251d63a59149b68a93f7ac34a0e4ce_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..bf72d8d72fe68c9b67965d788cdff0080132fab0
--- /dev/null
+++ b/.renku/workflow/0c251d63a59149b68a93f7ac34a0e4ce_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1901_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1901/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1901/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1901/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1901
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/0dfe13364d3842a389d0a7c0041e58d4_python.cwl b/.renku/workflow/0dfe13364d3842a389d0a7c0041e58d4_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..c3ed2ca8d7528626cc10dd6a1af9b7809b3da2f3
--- /dev/null
+++ b/.renku/workflow/0dfe13364d3842a389d0a7c0041e58d4_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1920_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1920/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1920/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1920/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1920
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/1099d291016a42b88441229dc6f00edb_python.cwl b/.renku/workflow/1099d291016a42b88441229dc6f00edb_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..1bf8706f4866fef74d90ded57fea12f3577859bb
--- /dev/null
+++ b/.renku/workflow/1099d291016a42b88441229dc6f00edb_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1939_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1939/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1939/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1939/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1939
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/10b263baa6cb4a1995a435bbf58f2dd8_python.cwl b/.renku/workflow/10b263baa6cb4a1995a435bbf58f2dd8_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..eaa7f80cd77722c8baa4736eaea3032a1b1d793f
--- /dev/null
+++ b/.renku/workflow/10b263baa6cb4a1995a435bbf58f2dd8_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1924_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1924/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1924/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1924/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1924
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/2866929d184b4145abd4ff60a5b17ac6_python.cwl b/.renku/workflow/2866929d184b4145abd4ff60a5b17ac6_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..9492d4aeccae6c5d4ee50e4a600ed2b623cf8af1
--- /dev/null
+++ b/.renku/workflow/2866929d184b4145abd4ff60a5b17ac6_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1991_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1991/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1991/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1991/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1991
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/2bf80c15eb374f1392a44cbe715d8b75_python.cwl b/.renku/workflow/2bf80c15eb374f1392a44cbe715d8b75_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..a4662996c62b60105e2e9d29afa5080d266febe5
--- /dev/null
+++ b/.renku/workflow/2bf80c15eb374f1392a44cbe715d8b75_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1948_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1948/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1948/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1948/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1948
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/2e32c26de8824d6a9ed0d03f7de081a0_python.cwl b/.renku/workflow/2e32c26de8824d6a9ed0d03f7de081a0_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..66153cc45293880ba398ee6fd17b6605198aa34d
--- /dev/null
+++ b/.renku/workflow/2e32c26de8824d6a9ed0d03f7de081a0_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1906_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1906/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1906/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1906/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1906
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/315b709bc82a4f42aecb6028053eb45c_python.cwl b/.renku/workflow/315b709bc82a4f42aecb6028053eb45c_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..8e9ef122478bc95ebfd6893d346ae272c06ca82a
--- /dev/null
+++ b/.renku/workflow/315b709bc82a4f42aecb6028053eb45c_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1923_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1923/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1923/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1923/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1923
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/38c064f35e1c43e6a9f3a2162c4db230_python.cwl b/.renku/workflow/38c064f35e1c43e6a9f3a2162c4db230_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..7293606f3d8bf3bbc920de314dd7b35ff6466687
--- /dev/null
+++ b/.renku/workflow/38c064f35e1c43e6a9f3a2162c4db230_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1932_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1932/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1932/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1932/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1932
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/3de19e9765574c0699ba7f649ecc100a_python.cwl b/.renku/workflow/3de19e9765574c0699ba7f649ecc100a_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..8f638e35f0d715d501c3d223ab520da45b4568fa
--- /dev/null
+++ b/.renku/workflow/3de19e9765574c0699ba7f649ecc100a_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1946_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1946/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1946/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1946/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1946
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/3f1cd56ab2304ed884ea4ed07f583f40_python.cwl b/.renku/workflow/3f1cd56ab2304ed884ea4ed07f583f40_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..3accab931ccae2d72b3f732f27243a41db54054f
--- /dev/null
+++ b/.renku/workflow/3f1cd56ab2304ed884ea4ed07f583f40_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1916_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1916/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1916/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1916/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1916
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/4523613056de4b73aca81c156aa1aa08_python.cwl b/.renku/workflow/4523613056de4b73aca81c156aa1aa08_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..71a941e35f1c965e6506af22647ebb0a0de3b834
--- /dev/null
+++ b/.renku/workflow/4523613056de4b73aca81c156aa1aa08_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1933_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1933/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1933/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1933/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1933
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/46541a4c28944396b841a1fef102d0f0_python.cwl b/.renku/workflow/46541a4c28944396b841a1fef102d0f0_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..5d15f85666470d0e800e92d72e5227b13b5d8660
--- /dev/null
+++ b/.renku/workflow/46541a4c28944396b841a1fef102d0f0_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1898_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1898/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1898/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1898/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1898
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/48e1d5c9b7074085886096a38e083a23_python.cwl b/.renku/workflow/48e1d5c9b7074085886096a38e083a23_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..ea1c1f50d3bfa9067eeeafe30222bee177edea05
--- /dev/null
+++ b/.renku/workflow/48e1d5c9b7074085886096a38e083a23_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1949_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1949/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1949/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1949/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1949
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/51b9ab96967e4ec5bdcbacb919085528_python.cwl b/.renku/workflow/51b9ab96967e4ec5bdcbacb919085528_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..a9393a080b45ded0dae3a2fb1f0ec46012b3dd35
--- /dev/null
+++ b/.renku/workflow/51b9ab96967e4ec5bdcbacb919085528_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1925_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1925/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1925/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1925/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1925
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/5925a49c747743bf9f825ace023e0db7_python.cwl b/.renku/workflow/5925a49c747743bf9f825ace023e0db7_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..3d8bb097578c428dd5adb4bc0cb6aabaf76c0040
--- /dev/null
+++ b/.renku/workflow/5925a49c747743bf9f825ace023e0db7_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1903_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1903/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1903/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1903/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1903
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/5b4900dc404148aca394b333a144b121_python.cwl b/.renku/workflow/5b4900dc404148aca394b333a144b121_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..91f9b8cb9669da278688cf7920329e3151104f1d
--- /dev/null
+++ b/.renku/workflow/5b4900dc404148aca394b333a144b121_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1930_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1930/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1930/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1930/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1930
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/6c375f1f47a64e3cb44f96b83a209505_python.cwl b/.renku/workflow/6c375f1f47a64e3cb44f96b83a209505_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..22334ef025ab17213e75fc8a1f6cfb0bd1ce3546
--- /dev/null
+++ b/.renku/workflow/6c375f1f47a64e3cb44f96b83a209505_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1919_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1919/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1919/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1919/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1919
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/6c52c0c15d5348e1971522d29b6d884f_python.cwl b/.renku/workflow/6c52c0c15d5348e1971522d29b6d884f_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..77fda1172b64aa11d55b26f9e3c0f19a410404ae
--- /dev/null
+++ b/.renku/workflow/6c52c0c15d5348e1971522d29b6d884f_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1912_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1912/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1912/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1912/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1912
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/6eb19472206e4e278e306175ebc0c875_python.cwl b/.renku/workflow/6eb19472206e4e278e306175ebc0c875_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..f044b45e1777fe9009aa89da2c8c1221bd73d1ef
--- /dev/null
+++ b/.renku/workflow/6eb19472206e4e278e306175ebc0c875_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1908_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1908/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1908/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1908/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1908
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/70cf9d7f4e9f4d20b7f2effe59a8a0f0_python.cwl b/.renku/workflow/70cf9d7f4e9f4d20b7f2effe59a8a0f0_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..6c4b6b12f3d7ecf6e326b88bcfa703b1c3726df9
--- /dev/null
+++ b/.renku/workflow/70cf9d7f4e9f4d20b7f2effe59a8a0f0_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1921_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1921/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1921/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1921/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1921
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/74545142cd9e4813ab933fc9b5941dfd_python.cwl b/.renku/workflow/74545142cd9e4813ab933fc9b5941dfd_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..004fa4709e83a95eb90fe39ec34097b02cf60e22
--- /dev/null
+++ b/.renku/workflow/74545142cd9e4813ab933fc9b5941dfd_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1910_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1910/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1910/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1910/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1910
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/78eeb8a58eff4b12ac8455f0abb25ebb_python.cwl b/.renku/workflow/78eeb8a58eff4b12ac8455f0abb25ebb_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..60792a05ee4ad795805932c88c7bedd5ad871088
--- /dev/null
+++ b/.renku/workflow/78eeb8a58eff4b12ac8455f0abb25ebb_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1914_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1914/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1914/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1914/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1914
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/7e6d16df0b9443f3a3f303a0efb9b416_python.cwl b/.renku/workflow/7e6d16df0b9443f3a3f303a0efb9b416_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..7c84c399da004706be641f2627de668ae44c3fe5
--- /dev/null
+++ b/.renku/workflow/7e6d16df0b9443f3a3f303a0efb9b416_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1918_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1918/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1918/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1918/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1918
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/7fd1821bb063499ab25ef38e22786b63_python.cwl b/.renku/workflow/7fd1821bb063499ab25ef38e22786b63_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..e19dcd74659f0f5ec31ae85f157467d218a6d31c
--- /dev/null
+++ b/.renku/workflow/7fd1821bb063499ab25ef38e22786b63_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1936_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1936/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1936/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1936/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1936
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/817b71eed0e5460e84625b9a742681ec_python.cwl b/.renku/workflow/817b71eed0e5460e84625b9a742681ec_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..0f4cad96f746e07e8806d6829f8295eb1c6c82d6
--- /dev/null
+++ b/.renku/workflow/817b71eed0e5460e84625b9a742681ec_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1902_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1902/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1902/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1902/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1902
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/8451b2081f8c42cfaee486564fa7f9c8_python.cwl b/.renku/workflow/8451b2081f8c42cfaee486564fa7f9c8_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..125073013a4b51ddc3298d2163861f4b9f78c632
--- /dev/null
+++ b/.renku/workflow/8451b2081f8c42cfaee486564fa7f9c8_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1915_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1915/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1915/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1915/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1915
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/898f4d9c24484169b593c5898a5f3402_python.cwl b/.renku/workflow/898f4d9c24484169b593c5898a5f3402_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..79dd4f9b80544bd79d0e25033d2b17911fdaa390
--- /dev/null
+++ b/.renku/workflow/898f4d9c24484169b593c5898a5f3402_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1944_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1944/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1944/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1944/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1944
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/8c208f893ded47448495e57ef6da4aaf_python.cwl b/.renku/workflow/8c208f893ded47448495e57ef6da4aaf_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..f9451205e46341f0c6c6e0bc5ba53432e8817ea6
--- /dev/null
+++ b/.renku/workflow/8c208f893ded47448495e57ef6da4aaf_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1891_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1891/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1891/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1891/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1891
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/8cfca353cd574434991fc5a533a4e758_python.cwl b/.renku/workflow/8cfca353cd574434991fc5a533a4e758_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..d513378f413505a3fd965273bee524e5ee696a14
--- /dev/null
+++ b/.renku/workflow/8cfca353cd574434991fc5a533a4e758_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1893_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1893/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1893/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1893/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1893
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/90bb5479330048288e67ec6ef8834382_python.cwl b/.renku/workflow/90bb5479330048288e67ec6ef8834382_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..0e0b591b1e744c05c165c74b91798c789cf95d12
--- /dev/null
+++ b/.renku/workflow/90bb5479330048288e67ec6ef8834382_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1937_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1937/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1937/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1937/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1937
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/92da3dfe8f0141d38755449c4f670a19_python.cwl b/.renku/workflow/92da3dfe8f0141d38755449c4f670a19_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..09c9eb7a73b35398cdd55fabb34edeb710bebf19
--- /dev/null
+++ b/.renku/workflow/92da3dfe8f0141d38755449c4f670a19_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1899_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1899/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1899/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1899/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1899
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/939c4e343b7b480e8c27e529b691414e_python.cwl b/.renku/workflow/939c4e343b7b480e8c27e529b691414e_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..81e6f072e91a72619a109b3facc19c7472d078f7
--- /dev/null
+++ b/.renku/workflow/939c4e343b7b480e8c27e529b691414e_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1911_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1911/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1911/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1911/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1911
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/9d998cb73d6245568b690e2239b3d7e8_python.cwl b/.renku/workflow/9d998cb73d6245568b690e2239b3d7e8_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..6ef08888e6ef8ddd235d4cb65ca0b21d85e00727
--- /dev/null
+++ b/.renku/workflow/9d998cb73d6245568b690e2239b3d7e8_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1935_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1935/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1935/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1935/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1935
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/9efc2a364bc34f18b99123f76d2b2d5a_python.cwl b/.renku/workflow/9efc2a364bc34f18b99123f76d2b2d5a_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..fa755df6d272d3aa1f783f1c6882e932ce80b070
--- /dev/null
+++ b/.renku/workflow/9efc2a364bc34f18b99123f76d2b2d5a_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1909_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1909/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1909/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1909/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1909
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/a05768eb941b4d6c9ec0a347c9fedff7_python.cwl b/.renku/workflow/a05768eb941b4d6c9ec0a347c9fedff7_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..f555a89d0292d53f89b0e88c74b315d6dea791e8
--- /dev/null
+++ b/.renku/workflow/a05768eb941b4d6c9ec0a347c9fedff7_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1929_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1929/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1929/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1929/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1929
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/a7d595d0a9a54bd59cfa20e6734f1e09_python.cwl b/.renku/workflow/a7d595d0a9a54bd59cfa20e6734f1e09_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..26b2073750fdbc9e3cea3691a74c4a34af298954
--- /dev/null
+++ b/.renku/workflow/a7d595d0a9a54bd59cfa20e6734f1e09_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1905_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1905/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1905/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1905/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1905
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/ab5b49553afb421480d1ddbfe62dedbf_python.cwl b/.renku/workflow/ab5b49553afb421480d1ddbfe62dedbf_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..15ed3a23698a9dff46ebf928c6ecf512d3c78bcf
--- /dev/null
+++ b/.renku/workflow/ab5b49553afb421480d1ddbfe62dedbf_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1895_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1895/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1895/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1895/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1895
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/abc6e8e7b6554803be67bfb725702458_python.cwl b/.renku/workflow/abc6e8e7b6554803be67bfb725702458_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..210ac3d6c8612cb10d37fce16b5d6d8f9ad90f12
--- /dev/null
+++ b/.renku/workflow/abc6e8e7b6554803be67bfb725702458_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1942_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1942/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1942/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1942/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1942
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/ae1d2b0faa424f1982fd942894804847_python.cwl b/.renku/workflow/ae1d2b0faa424f1982fd942894804847_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..f4715d7ea59359ce5cda722f74b8f90ed7d96491
--- /dev/null
+++ b/.renku/workflow/ae1d2b0faa424f1982fd942894804847_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1922_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1922/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1922/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1922/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1922
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/b1c5d45650d44d5c9f5fddf94796fba3_python.cwl b/.renku/workflow/b1c5d45650d44d5c9f5fddf94796fba3_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..14e6bc8b1da5ad8730146790c725f18a809de6e3
--- /dev/null
+++ b/.renku/workflow/b1c5d45650d44d5c9f5fddf94796fba3_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1896_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1896/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1896/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1896/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1896
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/b57850088f8c4223bedcf8929bab3929_python.cwl b/.renku/workflow/b57850088f8c4223bedcf8929bab3929_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..59e6170b07a0588ac5f9d6152f7af5c5cdcc336b
--- /dev/null
+++ b/.renku/workflow/b57850088f8c4223bedcf8929bab3929_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1938_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1938/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1938/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1938/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1938
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/b61c0463137347dd93a9bd82876caac3_python.cwl b/.renku/workflow/b61c0463137347dd93a9bd82876caac3_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..d4688b42c1cb451d5abe4d1320abdd79a95973b2
--- /dev/null
+++ b/.renku/workflow/b61c0463137347dd93a9bd82876caac3_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1894_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1894/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1894/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1894/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1894
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/c14a2befed574e3a9da8be4f4fccc540_python.cwl b/.renku/workflow/c14a2befed574e3a9da8be4f4fccc540_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..85f925f7bf63dfe633a4de6b2c9364dc025dc3dc
--- /dev/null
+++ b/.renku/workflow/c14a2befed574e3a9da8be4f4fccc540_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1928_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1928/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1928/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1928/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1928
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/c74eeb26c8a14bbaab491035c337c817_python.cwl b/.renku/workflow/c74eeb26c8a14bbaab491035c337c817_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..baf064f3d6109a9c6abf8a5ebd5fcdcf82d48837
--- /dev/null
+++ b/.renku/workflow/c74eeb26c8a14bbaab491035c337c817_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1892_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1892/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1892/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1892/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1892
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/cb1d01d9091649fabdd74777a6706c76_python.cwl b/.renku/workflow/cb1d01d9091649fabdd74777a6706c76_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..0b1d33a073004fb2da3e21ee22f2f10aa35908a6
--- /dev/null
+++ b/.renku/workflow/cb1d01d9091649fabdd74777a6706c76_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1927_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1927/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1927/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1927/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1927
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/cc392bab529f4657b1cc8b3ea1712cdf_python.cwl b/.renku/workflow/cc392bab529f4657b1cc8b3ea1712cdf_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..904c72053d8ff9e9fe01835ba3f78b58111b262b
--- /dev/null
+++ b/.renku/workflow/cc392bab529f4657b1cc8b3ea1712cdf_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1947_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1947/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1947/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1947/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1947
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/d205153ac4354d43848414c9ac45f845_python.cwl b/.renku/workflow/d205153ac4354d43848414c9ac45f845_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..f5647a44030b118901fe93dea26b96c63a3010b2
--- /dev/null
+++ b/.renku/workflow/d205153ac4354d43848414c9ac45f845_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1950_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1950/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1950/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1950/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1950
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/d6526f931252492fac987ae4391793e9_python.cwl b/.renku/workflow/d6526f931252492fac987ae4391793e9_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..9b70d7b99c9b25c2234d998cb6d7c9e619448506
--- /dev/null
+++ b/.renku/workflow/d6526f931252492fac987ae4391793e9_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1934_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1934/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1934/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1934/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1934
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/e7311ba30e2f49e89a971090b5878d29_python.cwl b/.renku/workflow/e7311ba30e2f49e89a971090b5878d29_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..01869c0e007302e6c06c12651f56c611478621b5
--- /dev/null
+++ b/.renku/workflow/e7311ba30e2f49e89a971090b5878d29_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1945_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1945/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1945/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1945/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1945
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/ec84663cdecf49aea4a493628503318f_python.cwl b/.renku/workflow/ec84663cdecf49aea4a493628503318f_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..1c53863101ae46140f5450613c87d57ccebbf079
--- /dev/null
+++ b/.renku/workflow/ec84663cdecf49aea4a493628503318f_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1940_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1940/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1940/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1940/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1940
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/f149928f975a42f29a9de37302eb7f9a_python.cwl b/.renku/workflow/f149928f975a42f29a9de37302eb7f9a_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..940c1760d53518dc7bf41491be73d6104e5014ef
--- /dev/null
+++ b/.renku/workflow/f149928f975a42f29a9de37302eb7f9a_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1904_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1904/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1904/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1904/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1904
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/f19f827a24df4408ba952e88a3409ace_python.cwl b/.renku/workflow/f19f827a24df4408ba952e88a3409ace_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..5f575b72070d769b8885c0a437962320be66562a
--- /dev/null
+++ b/.renku/workflow/f19f827a24df4408ba952e88a3409ace_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1941_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1941/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1941/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1941/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1941
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/f8478f303a9c4e248d63000ce7be6e41_python.cwl b/.renku/workflow/f8478f303a9c4e248d63000ce7be6e41_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..d4893c42a32c24c28ab2c068fb2b2c4d1d6ac556
--- /dev/null
+++ b/.renku/workflow/f8478f303a9c4e248d63000ce7be6e41_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1926_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1926/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1926/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1926/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1926
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/fb24b583f0874eefb07d3f8a2eb4ae83_python.cwl b/.renku/workflow/fb24b583f0874eefb07d3f8a2eb4ae83_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..1e198bec197f45bf6a072194119e278075b9b731
--- /dev/null
+++ b/.renku/workflow/fb24b583f0874eefb07d3f8a2eb4ae83_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1907_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1907/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1907/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1907/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1907
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/fbcdfd8bfa1a4a55889e26f0bad093b6_python.cwl b/.renku/workflow/fbcdfd8bfa1a4a55889e26f0bad093b6_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..a80aa58aa6c49645edcc83e7cc34559a5d1a9d9c
--- /dev/null
+++ b/.renku/workflow/fbcdfd8bfa1a4a55889e26f0bad093b6_python.cwl
@@ -0,0 +1,91 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_extract_discussions.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/politicians/lastnames/1931_MPs.pickle
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default:
+      class: File
+      path: ../../data/AB/1931/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_4:
+    default:
+      class: File
+      path: ../../data/AB/1931/03_correctedmeta.tar.gz
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_5:
+    default:
+      class: File
+      path: ../../data/lists/not_names.txt
+    inputBinding:
+      position: 5
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_6:
+    default:
+      class: File
+      path: ../../data/lists/dict_overlaps.pickle
+    inputBinding:
+      position: 6
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_7:
+    default: data/AB/1931/05_annotatedxml.tar.gz
+    inputBinding:
+      position: 7
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_7)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1931
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/data/AB/1891/05_annotatedxml.tar.gz b/data/AB/1891/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..7cdc4bd6d25bdd587f6e54bf8adf71448da596e0
--- /dev/null
+++ b/data/AB/1891/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:306f4862fce3d7d597f8ee60424137dafbb80d365ba26153c7f019633b27e473
+size 2519874
diff --git a/data/AB/1892/05_annotatedxml.tar.gz b/data/AB/1892/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..c11ce6d36517e9143b93676b91b2855b96e48673
--- /dev/null
+++ b/data/AB/1892/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b536073407eb1f49a3678c7f596bbb4b689dc557dcdb87fc4779dc3d358da23a
+size 1246431
diff --git a/data/AB/1893/05_annotatedxml.tar.gz b/data/AB/1893/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..447666efd91a0c91d4c8685976995d22cc99731f
--- /dev/null
+++ b/data/AB/1893/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7339937b989024efb9d51d8d7f74935fa38992e60217d0e7c8a92efa5dc5bb9a
+size 3607307
diff --git a/data/AB/1894/05_annotatedxml.tar.gz b/data/AB/1894/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..7e83ce0986aa4703629cec939590e30c58387555
--- /dev/null
+++ b/data/AB/1894/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35016cfa4eba2af84444e88e277f9dcc8a9008c6c780cf16e7f03c159314cea0
+size 3311441
diff --git a/data/AB/1895/05_annotatedxml.tar.gz b/data/AB/1895/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..de6c3b6dbd20cca1fa011a6d37b8bab1793b73a8
--- /dev/null
+++ b/data/AB/1895/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:772893d7ad4f8d1337d63324ecc27c9528c78832d83b4058f8d9e594643f1d76
+size 5986442
diff --git a/data/AB/1896/05_annotatedxml.tar.gz b/data/AB/1896/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..7702d0f2ce4f94656902916f2bc5232316641b77
--- /dev/null
+++ b/data/AB/1896/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e616dd9480cb1f2816a9c4f75d1f4e411971bf9272b0af7b433083961cd37243
+size 1580156
diff --git a/data/AB/1897/05_annotatedxml.tar.gz b/data/AB/1897/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..810f41052ce7269811dc9e65c076ddc97564e485
--- /dev/null
+++ b/data/AB/1897/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e803314068e32692a947ec43715e49346818e79bcd56c589bb64b68bf1c21a21
+size 7323375
diff --git a/data/AB/1898/05_annotatedxml.tar.gz b/data/AB/1898/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..7adce4a3fdc24966e0a6fd3b8fa24710b1c557df
--- /dev/null
+++ b/data/AB/1898/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aed5d6ae59432fb4bf77f1ed5ccd1027d5c4349da831527974697cff4b28e966
+size 2821956
diff --git a/data/AB/1899/05_annotatedxml.tar.gz b/data/AB/1899/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..f75edc0639843f6d89bfbbf8c85359de9d3c4121
--- /dev/null
+++ b/data/AB/1899/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11515821c13be9c21a0468a54a6d6461ec4dbff95c7f89cfcb606afc66ae4f68
+size 5328516
diff --git a/data/AB/1900/05_annotatedxml.tar.gz b/data/AB/1900/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..9a95bc8d687cbd1127f5c77fbf16ad035cfce9f9
--- /dev/null
+++ b/data/AB/1900/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85ae209ee6a04f1978566611161892d0312ee5b04c6a73b274001a460a8ff963
+size 3562049
diff --git a/data/AB/1901/05_annotatedxml.tar.gz b/data/AB/1901/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..ccec199b0fa8499e9882f329111a3b11302ff796
--- /dev/null
+++ b/data/AB/1901/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5f192b022e9b6e3d9cc3b79c1e92013dca1d8eb3d991b96023692b8abee68ca
+size 3908177
diff --git a/data/AB/1902/05_annotatedxml.tar.gz b/data/AB/1902/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..ef42d4a8fc7495afdf38d8d0c9cca3c43479007b
--- /dev/null
+++ b/data/AB/1902/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5ce4302b5cbe94e485e2588fd2f5c16c4c7bf1882236800d1563566af13b7c0
+size 3921369
diff --git a/data/AB/1903/05_annotatedxml.tar.gz b/data/AB/1903/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..8e8158defce20b1f9a927cb752eb5e7e3e9cb309
--- /dev/null
+++ b/data/AB/1903/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0235987f62a9df13b74c7a4944d3fe6729ed4e21f8b94adf8962e469b519f198
+size 4710192
diff --git a/data/AB/1904/05_annotatedxml.tar.gz b/data/AB/1904/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..7b9c54f3bb527314891b1da653a45fe0a809e395
--- /dev/null
+++ b/data/AB/1904/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aeb15eb07d1640ad59495133bcd1e2df22fd96343e4ef6687fded0e39d53ca94
+size 3069167
diff --git a/data/AB/1905/05_annotatedxml.tar.gz b/data/AB/1905/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..bebbdb42be8eda477201e9b45879c52d17242af5
--- /dev/null
+++ b/data/AB/1905/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3dd91f9eecb361b78491b722b4eba350c69051a34c33407f0da2c989266a8ebd
+size 6924410
diff --git a/data/AB/1906/05_annotatedxml.tar.gz b/data/AB/1906/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..2cd4497c171bea71bb19e89dbf00289cf7b56ef5
--- /dev/null
+++ b/data/AB/1906/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00b49d84c85465b2df154006d814d9f7dbdcfd810e3ab608c33b0c1a694a04c4
+size 7481844
diff --git a/data/AB/1907/05_annotatedxml.tar.gz b/data/AB/1907/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..62172c6f5d140bd0f8d370f66458214473faee8f
--- /dev/null
+++ b/data/AB/1907/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01ce643bb2d7e6d4200652156de6c3ae81b84de898add7b47e035c1649170577
+size 6556974
diff --git a/data/AB/1908/05_annotatedxml.tar.gz b/data/AB/1908/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..965bc5900462156d38ff275308b05315d6abf525
--- /dev/null
+++ b/data/AB/1908/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96d2a834917dd344891ce1d07d40db4e483b943c49280f78428a51ba6ec6e536
+size 5408603
diff --git a/data/AB/1909/05_annotatedxml.tar.gz b/data/AB/1909/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..9136fc298ca0aaba431b8f118be72f765eab9eeb
--- /dev/null
+++ b/data/AB/1909/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0fa9792a82c39678765b17c48f270669008cb9d05734351ee5b0193992f543d3
+size 6233989
diff --git a/data/AB/1910/05_annotatedxml.tar.gz b/data/AB/1910/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..4361152ddff986b40f698cf25002984e0bdda2bc
--- /dev/null
+++ b/data/AB/1910/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc4c008d59cdd149b2e624973fb1be9f136fddc0580ffbc99d01974eecf7eccf
+size 4685054
diff --git a/data/AB/1911/05_annotatedxml.tar.gz b/data/AB/1911/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..5cb5cc59a6b4ae245dd3d324b163e066b7ebf12f
--- /dev/null
+++ b/data/AB/1911/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f82bacad7e73a543203f3c7f3eb3e9e2f77ac1ff9e0b961fc693bd8f01d078c6
+size 3480643
diff --git a/data/AB/1912/05_annotatedxml.tar.gz b/data/AB/1912/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..d2cc3141106fb8d0623575f9498b9fa64be06515
--- /dev/null
+++ b/data/AB/1912/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3213283090d4b07cda0d814984ec5b68fea913b352e1933daef149d40edf814
+size 2832852
diff --git a/data/AB/1913/05_annotatedxml.tar.gz b/data/AB/1913/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..612c3c62e0486fcecc50009827ee467cc7b38ec0
--- /dev/null
+++ b/data/AB/1913/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57e62cfb6469b9e7a3795872c4532825bbbfac9f5deaa950759c816ec1e97137
+size 7407617
diff --git a/data/AB/1914/05_annotatedxml.tar.gz b/data/AB/1914/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..04b3605007261e8d556e6062f28f0e83903d1ef3
--- /dev/null
+++ b/data/AB/1914/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc967bb32f70335e9a7b07a33e4ed4a77e7a9b9cc63bbf8814026e9be15157a2
+size 4257343
diff --git a/data/AB/1915/05_annotatedxml.tar.gz b/data/AB/1915/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..0601249d0881e3ae419f843a762ebd3a2edd894b
--- /dev/null
+++ b/data/AB/1915/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d59a312b0b1a2cbeb50db4c0d1e20f0e6edd9fa28378c4c09c458de37dc1ccf9
+size 3570240
diff --git a/data/AB/1916/05_annotatedxml.tar.gz b/data/AB/1916/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..42cf10a3098ca7ac7ee40405822d8bb35e234a4e
--- /dev/null
+++ b/data/AB/1916/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0a0cf71cf165e83bb5e3a742c17d64a4f8e4ecfcaaea6d33d7a104b63a386af
+size 3312549
diff --git a/data/AB/1917/05_annotatedxml.tar.gz b/data/AB/1917/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..81735c58cc6a4aa0307b5ba58467d7f14fba1114
--- /dev/null
+++ b/data/AB/1917/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ee9e4830b417b12b6a3a02fabaebe8a21e5a31de2c9caff504606132330366f
+size 4948575
diff --git a/data/AB/1918/05_annotatedxml.tar.gz b/data/AB/1918/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..933827d331535e8aaf1f4dce8da6d360d57d1a47
--- /dev/null
+++ b/data/AB/1918/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8dce517598dfab962aede0fb020990a2305f7c3a5c156a5c0316c539dd1aa078
+size 5417955
diff --git a/data/AB/1919/05_annotatedxml.tar.gz b/data/AB/1919/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..8358de4881c7246eaef11aa7e42825b73dc97f8c
--- /dev/null
+++ b/data/AB/1919/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d12a669c80dd32f572fc5c9e9b46111abbd84da4e41ea61c8068c5d7ce7028b
+size 10801580
diff --git a/data/AB/1920/05_annotatedxml.tar.gz b/data/AB/1920/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..6a1a9086511c9e974c621ede057795ff03a2a158
--- /dev/null
+++ b/data/AB/1920/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:495b90e9537ae0c43810228f4381158e368fabee6c548c6dd92c3e14b15d2c89
+size 10497454
diff --git a/data/AB/1921/05_annotatedxml.tar.gz b/data/AB/1921/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..83d5b294c1bfb932c0914bb4735e6764347aa81e
--- /dev/null
+++ b/data/AB/1921/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3a727a6b574c05f5012704fa89c5a880bb1f3db671d3e66a993dec36c679cce
+size 8534160
diff --git a/data/AB/1922/05_annotatedxml.tar.gz b/data/AB/1922/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..1c3438311683292ee7b5ba8f2075b3b887a29723
--- /dev/null
+++ b/data/AB/1922/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dfece004bd2d590ae1c7909e9258ae465371a3bcc6a59025f32972aad78710af
+size 9504977
diff --git a/data/AB/1923/05_annotatedxml.tar.gz b/data/AB/1923/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..2aa674717ec348d05960118e912a8baf930f5672
--- /dev/null
+++ b/data/AB/1923/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e860b8d1418bba65365eb8d8b091f74c1bf51e384f1fe24f9ff76ab092198a2
+size 7265485
diff --git a/data/AB/1924/05_annotatedxml.tar.gz b/data/AB/1924/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..8fff59f60cca93d2a895a5fe31605270d359e512
--- /dev/null
+++ b/data/AB/1924/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b212c129390beff03818479e4dafd9d78b8140415c8cdaf466c6e3d89fc2ace1
+size 8890083
diff --git a/data/AB/1925/05_annotatedxml.tar.gz b/data/AB/1925/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..e9d339720707e6cebaf1acbcd2b9b5221568d58d
--- /dev/null
+++ b/data/AB/1925/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e808836799c46076c8d74445847b00dabd128dd0b33836dc73a9c8c288bd217
+size 8766445
diff --git a/data/AB/1926/05_annotatedxml.tar.gz b/data/AB/1926/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..96aaff689b243e19bc21a78ac101fac950d57992
--- /dev/null
+++ b/data/AB/1926/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b54fa8d4417546db7b90c6bf58bcacf3790d1e9bf6a35591426bda51af1ad7d
+size 8218586
diff --git a/data/AB/1927/05_annotatedxml.tar.gz b/data/AB/1927/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..98113ef551a7225665d5a1a98290da799fd77b0e
--- /dev/null
+++ b/data/AB/1927/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f37cdc91be5796cb80f3f41614bcbdb5b97e990151edd8b23782920e0c3fffa1
+size 8772617
diff --git a/data/AB/1928/05_annotatedxml.tar.gz b/data/AB/1928/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..26da4f17e1c3485701a74acb99dfd5cf465b34aa
--- /dev/null
+++ b/data/AB/1928/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe665c15caab4fb2aa2d67a3152a75380f07712a21a1587ef0020c3f34fd66c8
+size 9078900
diff --git a/data/AB/1929/05_annotatedxml.tar.gz b/data/AB/1929/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..6d27cf8c46b9e77d06c9fb65d73ef777b6e90cee
--- /dev/null
+++ b/data/AB/1929/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f23e910e3217561f3c35699848c9b6997d0faf0fa1a278285a79c272d3de60b1
+size 8603205
diff --git a/data/AB/1930/05_annotatedxml.tar.gz b/data/AB/1930/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..534a7b4068fbf278da0ab8ce77b9b7d94e32b348
--- /dev/null
+++ b/data/AB/1930/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7eb6b347f3d97fa5c09b1f0218b2830490f6076f3fd9fcdb8a82625c64971ac
+size 9769769
diff --git a/data/AB/1931/05_annotatedxml.tar.gz b/data/AB/1931/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..8d7a39c8b39795df6e67de5d747f3ef14d0a3d29
--- /dev/null
+++ b/data/AB/1931/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73795cf1b060758e055d2ddc640e71ff2ad9c278fca7887f5a0687cd77f6425b
+size 10724290
diff --git a/data/AB/1932/05_annotatedxml.tar.gz b/data/AB/1932/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..fc784d9295b1a67d6f351a38c1882a010163f0c9
--- /dev/null
+++ b/data/AB/1932/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00f55aeff99b9dd09ff584977411acfd646a10ae6e5455145250a20513583eea
+size 12837735
diff --git a/data/AB/1933/05_annotatedxml.tar.gz b/data/AB/1933/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..b7053f273c9c3ef26ec0c82068c2d3cf973af476
--- /dev/null
+++ b/data/AB/1933/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e66f7db7de6c977dab48884781ebc8153cc5a6f304a1f9e79525c77e8473f06f
+size 9985281
diff --git a/data/AB/1934/05_annotatedxml.tar.gz b/data/AB/1934/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..8be19e43e9c871e957a8682edb44a706a91d4eee
--- /dev/null
+++ b/data/AB/1934/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f1478f8003a86d47d133efe4f28b6d80185171351d0223da9ed4f402ac114e2
+size 10718410
diff --git a/data/AB/1935/05_annotatedxml.tar.gz b/data/AB/1935/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..491e56e6da9792f2cee21b3084bb6caa3ec9ef35
--- /dev/null
+++ b/data/AB/1935/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a271bf7f78be13036db411ca7cad626f6ab7fb918c2f306b5ba44537cd9f03d
+size 7036519
diff --git a/data/AB/1936/05_annotatedxml.tar.gz b/data/AB/1936/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..2c16410a0145b7e63971e53effac4de51369ba3b
--- /dev/null
+++ b/data/AB/1936/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62a9b6f5af9b12976a44da1bfb618254d10026aa663d002478b9f2a99dcc2a65
+size 14581319
diff --git a/data/AB/1937/05_annotatedxml.tar.gz b/data/AB/1937/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..895fc07818f1f6d2b37855a023df3f35dec7300c
--- /dev/null
+++ b/data/AB/1937/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50e1c9af95f73091089accef0eac072140478ef9b0048a8fbb289d53da6b794b
+size 9097736
diff --git a/data/AB/1938/05_annotatedxml.tar.gz b/data/AB/1938/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..6e84f59d47e32876b523056d7dd66ddd3bd977a0
--- /dev/null
+++ b/data/AB/1938/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d27fb1d86569eedecc88c815791d9c59061e0a4e5c99293c3ad9ac15013f3266
+size 11329732
diff --git a/data/AB/1939/05_annotatedxml.tar.gz b/data/AB/1939/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..34e533a2718f6921b35ab66addd95d4ade22393b
--- /dev/null
+++ b/data/AB/1939/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:919a6823ba1a5cfcc8697c01fd98b9a1ba08c6ff756e0d1600b0688f9cd0c2fb
+size 8991330
diff --git a/data/AB/1940/05_annotatedxml.tar.gz b/data/AB/1940/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..df71393dc092ba762a62bf789b1aa37e1351ed1c
--- /dev/null
+++ b/data/AB/1940/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0613b40a5ba57fcb7fac848b4a7a4c3222b2135449285208bd132c7cd4e59068
+size 6631861
diff --git a/data/AB/1941/05_annotatedxml.tar.gz b/data/AB/1941/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..8b539e9e9d3cd0173bade5e21220ff194cebd8e0
--- /dev/null
+++ b/data/AB/1941/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2b25b73313a26cd1bc7cd7d7cb6d3cec3885c1e811668c6a6ecac40e3c69a75
+size 4492781
diff --git a/data/AB/1942/05_annotatedxml.tar.gz b/data/AB/1942/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..f339f23eacd98253abbc343d22993d6fd5cbb505
--- /dev/null
+++ b/data/AB/1942/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dedcc95cb7285fffef6d4fc18bc243063e44f0919c99c8d70ec948ee10b3cf1f
+size 3809239
diff --git a/data/AB/1943/05_annotatedxml.tar.gz b/data/AB/1943/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..d7d764ff481bf406be00bb85e7bb07700f594fef
--- /dev/null
+++ b/data/AB/1943/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c913666ab05827283902acd7275bc3d23a7882119ddcd184e368009450ae9e0
+size 4175653
diff --git a/data/AB/1944/05_annotatedxml.tar.gz b/data/AB/1944/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..1a0658ded8f6b4ad346f214462835dc587a163c0
--- /dev/null
+++ b/data/AB/1944/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f90e5cb54dd3abd1086a19c07372a390b39296084cd691274e2eebd4d4b14f56
+size 5774197
diff --git a/data/AB/1945/05_annotatedxml.tar.gz b/data/AB/1945/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..4c88d83aa7d034b492e476c5ede383c1e343ac89
--- /dev/null
+++ b/data/AB/1945/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17f545945b1f61eabaa6cd027d543a7b8ab5c6d5e19a0e6e100a2950470026f0
+size 7240715
diff --git a/data/AB/1946/05_annotatedxml.tar.gz b/data/AB/1946/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..23a8319b425bdb442d6f5c85034fd0ec8d1214b4
--- /dev/null
+++ b/data/AB/1946/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b1cfe8386afc84b8b9166dacc10b5057c3749a3c5479e286626b590c7f19999
+size 10228679
diff --git a/data/AB/1947/05_annotatedxml.tar.gz b/data/AB/1947/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..13774e1a469c33713e745f3e2fd9a23676bda700
--- /dev/null
+++ b/data/AB/1947/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7cb8e23dc37d1ad85fd0af80c0b2160fc4cdf348d052f89b74eae5d54961d69e
+size 6416708
diff --git a/data/AB/1948/05_annotatedxml.tar.gz b/data/AB/1948/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..c6728abad78d33d854aa92a4bb8f4b7aea75c215
--- /dev/null
+++ b/data/AB/1948/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:47d3a0265bc37282acd72f6b2b024551422b52ed18a130e69fcfe511eb5129bd
+size 7829311
diff --git a/data/AB/1949/05_annotatedxml.tar.gz b/data/AB/1949/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..ce6e03bee660b9cd53fa5b557e634ad690ab5661
--- /dev/null
+++ b/data/AB/1949/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7bf4c7698d7bac7d33adcfed7a7cd6efd4f237363077d9861aae03cb385e5f7d
+size 10143052
diff --git a/data/AB/1950/05_annotatedxml.tar.gz b/data/AB/1950/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..b896946b87d179ae86495a57049cddefb8c26d63
--- /dev/null
+++ b/data/AB/1950/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa75f4aa036d7543b0ea3e9589c59e601fcfcc0ecbb8d6a06976443c4ced351d
+size 9032353
diff --git a/data/AB/1991/05_annotatedxml.tar.gz b/data/AB/1991/05_annotatedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..09c53900c186185b1fc73c3b3eaa93d3a0075c9f
--- /dev/null
+++ b/data/AB/1991/05_annotatedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:acdab37456178165952f1ae7525e29e451706889a195a65fa4738a56fc506b04
+size 29777601
diff --git a/src/python/def_classes.py b/src/python/def_classes.py
index cb2f626087d76ebc4ab1baf78b52b45cd42ed4ed..96897ce758931fd5ef0acb1d1dfde39537b89801 100644
--- a/src/python/def_classes.py
+++ b/src/python/def_classes.py
@@ -732,7 +732,7 @@ class Document:
         utils_proc.call_with_out(command)
 
 
-    def get_council_date(self, name_outmeta = '03_correctedmeta'):
+    def _get_council_date(self, name_outmeta = '03_correctedmeta'):
 
         if 'name_outmeta' not in self.__dict__.keys():
             self.name_outmeta = name_outmeta
@@ -750,7 +750,7 @@ class Document:
     # - dict_overlaps: dictionary with overlaps
     # output:
     # - (first_entry, last_entry): tuple of first and last textbox id
-    def get_first_last_textbox(self, dict_overlaps_year):
+    def _get_first_last_textbox(self, dict_overlaps_year):
 
         # initialize to impossible values
         first_entry = -1
@@ -781,7 +781,7 @@ class Document:
             self.name_outxml = name_outxml
 
         if 'XML_main_corr' not in self.__dict__.keys():
-            print('no main corr')
+            #print('no main corr')
             name_tar = self.folder_database + '/' + str(self.year) + '/' + self.name_outcorrxml + '.tar.gz'
             if os.path.isfile(name_tar):
                 name_xml = './' + str(self.year) + '/' + str(self.id_doc) + suffix_xml + 'corr.xml'
@@ -799,13 +799,13 @@ class Document:
                     #pages = 'all', suffix_xml = '_data', name_outxml = self.name_outxml,
                     #name_outcorrxml = self.name_outcorrxml)
 
-        print('we have a main corr XML file')
+        #print('we have a main corr XML file')
 
         # get council and date
-        self.get_council_date()
+        self._get_council_date()
 
         # get start and end of document
-        entries = self.get_first_last_textbox(self.dict_overlaps_year)
+        entries = self._get_first_last_textbox(self.dict_overlaps_year)
 
         # update if document starts/ends as on pdf
         if entries[0] == -1:
@@ -841,9 +841,7 @@ class Document:
 
         self.name_outannotxml = name_outannotxml
         self.name_annot_corr = [name_tar, name_xml]
-#        self._xml_ext(suffix_xml, self.name_outannotxml)
         command = 'rm -rf ./' + str(self.year)
-        #print(command)
         utils_proc.call_with_out(command)
 
         print("End of file %s - %s seconds -" % (self.input_file, (time.time() - start_time)))
diff --git a/src/python/run_extract_discussions.py b/src/python/run_extract_discussions.py
index b0b56828cf9f14d7df7f83610e11ddd024f4db41..a7689b9b4e5433b1a84bdb30604672f396e1442e 100644
--- a/src/python/run_extract_discussions.py
+++ b/src/python/run_extract_discussions.py
@@ -4,21 +4,17 @@
 # Code to extract discussions from corrected XML files
 #%%
 # to work with atom
-%load_ext autoreload
-%autoreload 2
+#%load_ext autoreload
+#%autoreload 2
 
 import pickle
 import time
-import xml.etree.ElementTree as ET
 
 import sys
 sys.path.append('src/python/')
 
 import def_classes as defc
 import utils_proc
-import utils_annot
-
-import os
 
 from utils_proc import call_with_out
 
@@ -35,7 +31,7 @@ input_overlaps = "data/lists/dict_overlaps.pickle"
 output_annotatedxml = "data/AB/" + year + "/05_annotatedxml.tar.gz"
 
 #%%
-# detect arguments
+# detect arguments from sh file
 input_lastnames = sys.argv[1]
 input_correctedxml = sys.argv[2]
 input_correctedmeta = sys.argv[3]
@@ -43,35 +39,29 @@ input_notnames = sys.argv[4]
 input_overlaps = sys.argv[5]
 output_annotatedxml = sys.argv[6]
 
+
 #%%
 # extract suffixes, year, folder_database
 suffix_tar_correctedxml = input_correctedxml.split('/')[-1].split('.tar.gz')[0]
 #suffix_tar_correctedmeta = input_correctedmeta.split('/')[-1].split('.tar.gz')[0]
 year = input_correctedxml.split('/')[-2]
 folder_database = input_correctedxml.split(year)[0]
-suffix_correctedmeta = '_metacorr'
-#suffix_correctedxml = '_datacorr'
 
 #%%
-# TODO: make it work!
 # git lfs pull necessary data
-for lfsfile in [input_correctedxml, input_correctedmeta]:
+# does not work in atom
+for lfsfile in [input_correctedxml, input_correctedmeta, input_overlaps]:
     command = 'git lfs pull -I ' + lfsfile
-    #print(command)
     call_with_out(command)
-
-#%%
-# TODO: exclude overlaps --> after annotation
-
-
+    
+command = 'git lfs pull -I ' + input_lastnames.split(year)[0]
+call_with_out(command)   
+    
 #%%
-start_time_discussions = time.time()
-print('start to identify discussions of the year', year, '\n')
-
 # extract list of files
 files_to_process, _ = utils_proc.get_list(year, folder_database, suffix_tar_correctedxml)
 files_to_process.sort()
-print('files to process loaded:', files_to_process)
+print('files to process loaded')
 
 # open dataframe of last names from pickle file
 # (there is one file of lastnames per year)
@@ -107,7 +97,7 @@ for file_tarpath in files_to_process:
 
     # if document is a discussion
     if (file_doc.check_discussion()) and (id_doc not in ['20032463', '20032952', '20014332']):
-        print(id_doc + '\n')
+        #print(id_doc + '\n')
         file_doc.df_lastnames = df_lastnames
         file_doc.list_notnames = list_notnames
         file_doc.dict_overlaps_year = dict_overlaps_year
@@ -122,39 +112,30 @@ utils_proc.compress_tar(output_annotatedxml)
 
 
 #%%
-with open(input_notnames) as f:
-    list_notnames = f.readlines()
+#with open(input_notnames) as f:
+#    list_notnames = f.readlines()
 
-list_notnames = [term.rstrip() for term in list_notnames]
+#list_notnames = [term.rstrip() for term in list_notnames]
 
 # to test for one file
-file_tarpath = './1936/20031998_datacorr.xml'
+#file_tarpath = './1936/20031998_datacorr.xml'
 
-id_doc = file_tarpath.split('/')[-1][:8]
+#id_doc = file_tarpath.split('/')[-1][:8]
 
 # instantiate document object (always from original pdf)
-infile_aux = year + '/' + id_doc + '.pdf'
-file_doc = defc.Document(infile_aux, folder_database)
+#infile_aux = year + '/' + id_doc + '.pdf'
+#file_doc = defc.Document(infile_aux, folder_database)
 
 
-if (file_doc.check_discussion()) and (id_doc not in ['20032463', '20032952', '20014332']):
-    print(id_doc + '\n')
+#if (file_doc.check_discussion()) and (id_doc not in ['20032463', '20032952', '20014332']):
+#    print(id_doc + '\n')
 
-    file_doc.df_lastnames = df_lastnames
-    file_doc.list_notnames = list_notnames
-    file_doc.dict_overlaps_year = dict_overlaps_year
-    file_doc.annotate_xml()
+#    file_doc.df_lastnames = df_lastnames
+#    file_doc.list_notnames = list_notnames
+#    file_doc.dict_overlaps_year = dict_overlaps_year
+#    file_doc.annotate_xml()
 
 
 #%%
 
-file_doc = defc.Document(infile_aux, folder_database)
-file_doc.get_council_date()
-#id_doc
-
-#len(files_to_process)
-file_doc.check_discussion()
 
-str_date = '1925-12-09 08:00'
-import datetime
-datetime.datetime.strptime(str_date, '%Y-%m-%d %H:%M')
diff --git a/src/python/utils_annot.py b/src/python/utils_annot.py
index ce4a88568f880975da15996fe02c47e85d150c8d..a6e4c9cf6c3999dbffdb46d79794a6f46b0d462b 100644
--- a/src/python/utils_annot.py
+++ b/src/python/utils_annot.py
@@ -431,8 +431,7 @@ def label_speechstart(XML_new, ind_p, ind_t, text, ind_tl_colon, df_names, list_
                     # set flag
                     this_is_speech = True
                     if bln_print:
-                        print('found a name:', text_start, list_oi, str_name, str_role, '\n')
-                    print('found a name:', text_start, list_oi, ind_tl_colon, str_name, str_role, list_uniqueID, '\n')
+                        print('found a name:', text_start, list_oi, ind_tl_colon, str_name, str_role, list_uniqueID, '\n')
 
     return XML_new, this_is_speech
 
@@ -513,9 +512,10 @@ def find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str
     def get_string(term, df_names, str_name, list_uniqueID):
         # get name type
         name_type = df_names['nameType'].loc[df_names['shortName']==term].iloc[0]
-        if name_type != 'simple':
-            print(df_names[df_names['shortName']==term])
-        print(term, name_type)
+        if bln_print:
+            if name_type != 'simple':
+                print(df_names[df_names['shortName']==term])
+            print(term, name_type)
 
         # extract uniqueID and complete name for this term
         list_temp = []
@@ -528,7 +528,8 @@ def find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str
             list_temp = list(df_names.loc[(df_names['shortName']==term)].iloc[:, df_names.columns.get_loc('uniqueIndex')])
             str_completeName = term + ' (CANTON MISSING)'
 
-        print(list_temp, str_completeName)
+        if bln_print:
+            print(list_temp, str_completeName)
 
         # set or update unique ID and name
         # if no unique ID and name has been assigned so far
@@ -686,7 +687,8 @@ def find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str
                 if term in list_:
                     str_canton = term
                     canton_type = type_
-                    print('!!! is a canton', term, list_oi, str_name, str_role)
+                    if bln_print:
+                        print('!!! is a canton', term, list_oi, str_name, str_role)
                     break
 
             # if person was not uniquely identified, check for misspellings
@@ -699,7 +701,8 @@ def find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str
                     if term_approx:
                         str_canton = term_approx
                         canton_type = type_
-                        print('!!! is a canton', term, list_oi, str_name, str_role)
+                        if bln_print:
+                            print('!!! is a canton', term, list_oi, str_name, str_role)
                         break
 
             # if a canton or similar was found
@@ -728,15 +731,18 @@ def find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str
 
 
                 else:
-                    print(canton_type, str_canton, str_name, df_temp)
+                    if bln_print:
+                        print(canton_type, str_canton, str_name, df_temp)
                     list_temp = list(df_temp.loc[(df_temp['shortName']==str_name) & (df_temp[canton_type]==str_canton)].iloc[:, df_temp.columns.get_loc('uniqueIndex')])
                     str_completeName = df_temp['completeName'].loc[(df_temp['shortName']==str_name) & (df_temp[canton_type]==str_canton)].iloc[0]
 
-                print(list_temp, list_uniqueID, str_completeName)
+                if bln_print:
+                    print(list_temp, list_uniqueID, str_completeName)
 
                 if len(list_temp) > 0:
                     list_uniqueID = update_list_uniqueID(list_uniqueID, list_temp, name_type)
-                    print(str_completeName)
+                    if bln_print:
+                        print(str_completeName)
                     if 'CANTON MISSING' in str_completeName:
                         str_name = add_to_string('', str_completeName)
                     elif str_completeName.split(' ')[0] == str_name:
@@ -745,13 +751,15 @@ def find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str
                         str_name = add_to_string(str_name, str_completeName)
 
             else:
-                print('could not be identified as a canton:', term, list_oi, str_name, str_role)
+                if bln_print:
+                    print('could not be identified as a canton:', term, list_oi, str_name, str_role)
 
         # if term is first name
         # needed when people are referenced by FirstName LastName, e.g. Simon Kohler
         elif term in list_all_firstnames:
             str_firstname = term
-            print('found a first name', str_firstname)
+            if bln_print:
+                print('found a first name', str_firstname)
 
         # if term is not easily mistaken as a name (avoid false positives)
         elif term not in list_notnames:
@@ -780,7 +788,6 @@ def find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str
             # TODO check for false positives of these procedures
             if name_type == 'canton':
                 # check if person can be identified from firstname
-                print(str_firstname)
                 if str_firstname:
                     df_temp = df_names.loc[(df_names['shortName']==str_name.split(' ')[0]) & (df_names['FirstName']==str_firstname)]
                     if df_temp.shape[0] == 1:
@@ -831,7 +838,8 @@ def find_names(list_oi, list_roles, list_roles_ext, df_names, list_notnames, str
                         else:
                             str_name = add_to_string(str_name, str_completeName)
 
-                    print(str_date, df_temp.shape, df_temp_before.shape, df_temp_after.shape)
+                    if bln_print:
+                        print(str_date, df_temp.shape, df_temp_before.shape, df_temp_after.shape)
 
 
                 # TODO: function to update list unique ID and str_name
diff --git a/src/sh/extract_discussions_yearly.sh b/src/sh/extract_discussions_yearly.sh
index 290554ff2d4182ddffd659afb3e34f39f3dd9be0..910e9b191da78bd60ef7115b4f9e763c05e396b6 100755
--- a/src/sh/extract_discussions_yearly.sh
+++ b/src/sh/extract_discussions_yearly.sh
@@ -1,18 +1,18 @@
 #!/bin/bash
 
-year_start=1891
-year_end=1891
-
-input_lastnames = data/politicians/lastnames/${year}_lastnames.pickle
-input_correctedxml = data/AB/${year}/04_correctedxml.tar.gz
-input_correctedmeta = data/AB/${year}/03_correctedmeta.tar.gz
-input_notnames = data/lists/not_names.txt
-input_overlaps = data/lists/dict_overlaps.pickle
-output_annotatedxml = data/AB/${year}/05_annotatedxml.tar.gz
+year_start=$1
+year_end=$2
 
 for year in $(seq $year_start $year_end)
 do
     echo $year
-    # renku run --isolation
-    python src/python/run_extract_discussions.py input_lastnames input_correctedxml input_correctedmeta input_notnames input_overlaps output_annotatedxml
+    
+    input_lastnames=data/politicians/lastnames/${year}_MPs.pickle
+    input_correctedxml=data/AB/${year}/04_correctedxml.tar.gz
+    input_correctedmeta=data/AB/${year}/03_correctedmeta.tar.gz
+    input_notnames=data/lists/not_names.txt
+    input_overlaps=data/lists/dict_overlaps.pickle
+    output_annotatedxml=data/AB/${year}/05_annotatedxml.tar.gz
+
+    renku run --isolation python src/python/run_extract_discussions.py $input_lastnames $input_correctedxml $input_correctedmeta $input_notnames $input_overlaps $output_annotatedxml
 done