diff --git a/.gitattributes b/.gitattributes
index 322c7e41fa0cb7d9a137a0bbd8e7aa2ca252c288..bcb7c4d4d23b73c05031572b9e29f12153e4b397 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -254,6 +254,7 @@ data/AB/1983/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1991/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1991/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/lists/dict_overlaps.pickle filter=lfs diff=lfs merge=lfs -text
+data/lists/all_titles.csv filter=lfs diff=lfs merge=lfs -text
 data/AB/1978/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1979/02_extractedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
 data/AB/1975/04_correctedxml.tar.gz filter=lfs diff=lfs merge=lfs -text
\ No newline at end of file
diff --git a/.renku/workflow/d7f5834807064a1f8ed8026be247a3be_python.cwl b/.renku/workflow/d7f5834807064a1f8ed8026be247a3be_python.cwl
new file mode 100644
index 0000000000000000000000000000000000000000..1226793d34017c9012dd77961743a84a3740a1d3
--- /dev/null
+++ b/.renku/workflow/d7f5834807064a1f8ed8026be247a3be_python.cwl
@@ -0,0 +1,60 @@
+arguments: []
+baseCommand:
+- python
+class: CommandLineTool
+cwlVersion: v1.0
+hints: []
+inputs:
+  input_1:
+    default:
+      class: File
+      path: ../../src/python/extract_document_titles.py
+    inputBinding:
+      position: 1
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: File
+  input_2:
+    default:
+      class: Directory
+      listing: []
+      path: ../../data/AB
+    inputBinding:
+      position: 2
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: Directory
+  input_3:
+    default: 03_correctedmeta
+    inputBinding:
+      position: 3
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+  input_4:
+    default: data/lists/all_titles.csv
+    inputBinding:
+      position: 4
+      separate: true
+      shellQuote: true
+    streamable: false
+    type: string
+outputs:
+  output_0:
+    outputBinding:
+      glob: $(inputs.input_4)
+    streamable: false
+    type: File
+permanentFailCodes: []
+requirements:
+- class: InlineJavascriptRequirement
+- class: InitialWorkDirRequirement
+  listing:
+  - entry: '$({"listing": [], "class": "Directory"})'
+    entryname: data/lists
+    writable: true
+successCodes: []
+temporaryFailCodes: []
diff --git a/data/lists/all_titles.csv b/data/lists/all_titles.csv
new file mode 100644
index 0000000000000000000000000000000000000000..812c19fa3d04e8278fba69e89350e22aeb45383a
--- /dev/null
+++ b/data/lists/all_titles.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45fbfbadcb22dcb849344764815b91ba6a54e092f392f5ea6b84d04d8e90c9cd
+size 5867681
diff --git a/src/python/def_classes.py b/src/python/def_classes.py
index f8e07833543f350fe3f1a5877f63986f0a898038..cb2f626087d76ebc4ab1baf78b52b45cd42ed4ed 100644
--- a/src/python/def_classes.py
+++ b/src/python/def_classes.py
@@ -718,6 +718,20 @@ class Document:
 
         return flag_discussion
 
+
+    def get_document_title(self, list_attributes, name_outmeta = '03_correctedmeta'):
+
+        if 'name_outmeta' not in self.__dict__.keys():
+            self.name_outmeta = name_outmeta
+
+        utils_proc.tar_extractfile(self.name_meta_corr[1], self.folder_database, name_file = self.name_outmeta)
+        self.list_titles = utils_annot.get_document_title_(self.name_meta_corr[1], list_attributes)
+
+        command = 'rm -rf ./' + str(self.year)
+        #print(command)
+        utils_proc.call_with_out(command)
+
+
     def get_council_date(self, name_outmeta = '03_correctedmeta'):
 
         if 'name_outmeta' not in self.__dict__.keys():
diff --git a/src/python/extract_document_titles.py b/src/python/extract_document_titles.py
new file mode 100644
index 0000000000000000000000000000000000000000..3efe540f94411a00b61067d64e65d83247780bc3
--- /dev/null
+++ b/src/python/extract_document_titles.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Code to get titles of all the documents and save it in a csv file
+# to run:
+# renku run --isolation python src/python/extract_document_titles.py data/AB/ 03_correctedmeta data/lists/all_titles.csv
+
+import sys
+import csv
+
+import utils_proc
+import def_classes as defc
+
+# years of interest
+years = [1891, 1995]   #1995
+range_years = range(years[0], years[1] + 1)
+
+# specify input and output parameters
+folder_database = sys.argv[1]
+suffix_correctedmeta = sys.argv[2]
+output_titles = sys.argv[3]
+
+# title attributes
+list_attributes = ['year', 'number', 'TITEL_NORMAL_DE', 'TITEL_NORMAL_FR', 'TITEL_ORIGINAL_DE', 'TITEL_ORIGINAL_FR']
+
+# initialize list of all titles and add header
+list_all_titles = []
+list_all_titles.append(list_attributes)
+
+# for each year
+for year in range_years:
+    print(year)
+    # generate path of input file
+    input_file = folder_database + str(year) + '/' + suffix_correctedmeta + '.tar.gz'
+    command = 'git lfs pull -I ' + input_file
+    utils_proc.call_with_out(command)
+
+    # get list of files to process and sort that list
+    files_to_process, _ = utils_proc.get_list(str(year), folder_database, suffix_correctedmeta)
+    files_to_process.sort()
+
+    # for each file
+    for file_tarpath in files_to_process:
+        # get id
+        id_doc = file_tarpath.split('/')[-1][:8]
+
+        # instantiate document object (always from original pdf)
+        file_aux = str(year) + '/' + id_doc + '.pdf'
+        file_doc = defc.Document(file_aux, folder_database)
+
+        # add year and id
+        list_titles = [str(year), id_doc]
+
+        # get titles and add them to the list for that document
+        file_doc.get_document_title(list_attributes[2:])
+        list_titles.extend(file_doc.list_titles)
+
+        # append to list of all titles
+        list_all_titles.append(list_titles)
+
+# save csv file
+with open(output_titles, 'w') as fo:
+    wr = csv.writer(fo, dialect='excel')
+    wr.writerows(list_all_titles)
diff --git a/src/python/utils_annot.py b/src/python/utils_annot.py
index 7d8ce8053992f159caae552c4d84bed90bcae5af..ce4a88568f880975da15996fe02c47e85d150c8d 100644
--- a/src/python/utils_annot.py
+++ b/src/python/utils_annot.py
@@ -50,6 +50,26 @@ def check_if_discussion(path_meta_xml_file,
 
     return True
 
+# function to get date and council
+def get_document_title_(path_meta_xml_file, list_attributes):
+
+    # parse, get root and then part of interest
+    XML_tree = ET.parse(path_meta_xml_file)
+    XML_root = XML_tree.getroot()
+    XML_poi = XML_root[0].find('ADS_TEXTEINHEIT')
+
+    # get titles
+    list_titles = []
+    for attribute in list_attributes:
+        if attribute in XML_poi.attrib:
+            title = XML_poi.attrib[attribute]
+            list_titles.append(title)
+        else:
+            list_titles.append('(empty)')
+
+    return list_titles
+
+
 # function to get date and council
 def get_council_and_date(path_meta_xml_file):