diff --git a/.gitattributes b/.gitattributes
index 7030f5110e7a8b02a8dabeae2e4254548fcd3bca..5897e0e35513b77240dc873fa6760c20d206a130 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -319,3 +319,6 @@ data/AB/1947/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1948/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1949/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1950/05_annotatedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1976/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1977/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
+data/AB/1975/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 9a187c9b9de280eadceda624e53b087796a9bed4..ac6bfbfb7f6f29aa500e4355baf8a4f94083ffad 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -24,8 +24,9 @@ image_build:
 
 dot:
   stage: build
-  image: renku/singleuser:latest
+  image: renku/renku-python:latest
   script:
+    - renku --version
     - renku log --format dot $(git ls-files --no-empty-directory --recurse-submodules) > graph.dot
   artifacts:
     paths:
diff --git a/.renku/workflow/5864b37f16a646958e484a578e9ea288_python.cwl b/.renku/workflow/5864b37f16a646958e484a578e9ea288_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..5bd8208aa235401043c857e852b932b3404e103a
--- /dev/null
+++ b/.renku/workflow/5864b37f16a646958e484a578e9ea288_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_correctxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1976/02_extractedxml.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1976/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1976
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/.renku/workflow/6530d4b63e1045cb9b5af1e1d728cb44_python.cwl b/.renku/workflow/6530d4b63e1045cb9b5af1e1d728cb44_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..0b3192edf52a462c7db29c514f196da16ef6576f
--- /dev/null
+++ b/.renku/workflow/6530d4b63e1045cb9b5af1e1d728cb44_python.cwl
@@ -0,0 +1,51 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/run_correctxml.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: File
+      path: ../../data/AB/1977/02_extractedxml.tar.gz
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_3:
+    default: data/AB/1977/04_correctedxml.tar.gz
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_3)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/AB/1977
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/Dockerfile b/Dockerfile
index eccc0209bce183e39ef458c2b5f2af992f65b8f5..e51294d61b2799d50d4b7b46b7c659fa8e3edf17 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -16,4 +16,7 @@ RUN sudo apt-get install -y vim
 # install spacy models
 RUN python -m spacy download de_core_news_sm
 RUN python -m spacy download fr_core_news_sm
-RUN python -m spacy download xx_ent_wiki_sm
\ No newline at end of file
+RUN python -m spacy download xx_ent_wiki_sm
+
+# install nltk stopwords and punkt
+RUN python -m nltk.downloader punkt && python -m nltk.downloader stopwords
\ No newline at end of file
diff --git a/data/AB/1977/04_correctedxml.tar.gz b/data/AB/1977/04_correctedxml.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..b5af7fecd87275edc8be840fd1dc4c2d6317ccd6
--- /dev/null
+++ b/data/AB/1977/04_correctedxml.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5097c7c4a8b40861290f1bcb828d93163c0ac3a9b6955c257936a6540d987c48
+size 22497388
diff --git a/src/python/run_correctxml.py b/src/python/run_correctxml.py
index ff0710cb6d532daa9e60fb5a5d9e5c25923e7507..254909fbd8462363e598c27b0e68c77adeb7c123 100644
--- a/src/python/run_correctxml.py
+++ b/src/python/run_correctxml.py
@@ -34,6 +34,13 @@ folder_database = input_file.split(year_tocomp)[0]
 t1 = time.time()
 
 name_tar_file = input_file.split('/')[-1].split('.tar.gz')[0]
+print(input_file)
+comm = 'git lfs pull -I ' + input_file
+utils_proc.call_with_out(comm)
+input_file_pdf = '/'.join(input_file.split('/')[:-1]) + '/00_rawpdfs.tar.gz'
+comm = 'git lfs pull -I ' + input_file_pdf
+utils_proc.call_with_out(comm)
+
 files_proc, _ = utils_proc.get_list(year_tocomp, folder_database, name_tar_file)
 
 list_proc = list()
@@ -44,12 +51,12 @@ for infile in files_proc:
     if infile_aux not in list_proc:
         list_proc.append(infile_aux)
         d1 = defc.Document(infile_aux, folder_database)
-        try :
-            d1.correct_xml(flag_plots = 0, flag_parallel = 0, flag_save_figs = 0, name_outxml = name_tar_file,
-                           name_outcorrxml = name_tar_out)
+        #try :
+        d1.correct_xml(flag_plots = 0, flag_parallel = 0, flag_save_figs = 0, name_outxml = name_tar_file,
+                        name_outcorrxml = name_tar_out)
             #print('Corrected %s' % infile)
-        except:
-            print("File to correct %s prompted an error" % infile)
+        #except:
+        #    print("File to correct %s prompted an error" % infile)
 
 # Commands to get the compressed version of the file
 #data/AB/${year}/02_extractedxml.tar.gz
diff --git a/src/sh/execute_per_year_isolation.sh b/src/sh/execute_per_year_isolation.sh
index cd3f676b994f05672484e300931c3c452317003a..1533a900599c21ed0e51a87c1e066033d61ed04f 100755
--- a/src/sh/execute_per_year_isolation.sh
+++ b/src/sh/execute_per_year_isolation.sh
@@ -12,5 +12,6 @@ for year in $(seq $year_start $year_end)
 do
     echo $year
     $CONDA_DIR/envs/renku/bin/renku run --isolation python $1 ${2}/$year/${3}.tar.gz ${2}/$year/${4}.tar.gz
+    git push
 done