diff --git a/src/python/run_titlesComp_2steps.py b/src/python/run_titlesComp_2steps.py
new file mode 100644
index 0000000000000000000000000000000000000000..9554bd50cd629eaaa275319584710dc9ae0d828c
--- /dev/null
+++ b/src/python/run_titlesComp_2steps.py
@@ -0,0 +1,446 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Apr 20 12:20:59 2020
+
+@author: luissalamanca
+"""
+
+'''
+The first test of command to extend the titles
+'''
+#%%
+
+import os, sys
+
+os.environ['DEMOCRASCI_DATA'] = "/Users/luissalamanca/My_stuff/05_SDSCresearch/02_NLP/Code/democrasci_preprocwp1/data/"
+import LOCAL_CONSTANTS
+import CONSTANTS
+
+import pandas as pd
+import numpy as np
+import def_classes as defc
+import utils_proc
+import utils_feats
+import plot_tools as pt
+import matplotlib.pyplot as plt
+from nltk.tokenize import word_tokenize
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.manifold import TSNE
+import regex
+
+def size_matching_bbox(bbox, list_bbox):
+    # Compare a single bbox with a list of bboxes, to locate the one
+    # that matches better the dimensions. This is is necessary to compensate 
+    # for slightly differences in the bboxes though to slightly different
+    # processings
+    # Input: both have to be strings
+    bbox = np.array(bbox.split(',')).astype(float)
+    list_bbox = np.array([o.split(',') for o in list_bbox]).astype(float)
+    dist_vec = np.sum(np.abs(list_bbox - bbox.reshape((1,4))), axis = 1)
+    return np.argmin(dist_vec), np.min(dist_vec)
+
+def remove_space_char(text):
+    all_matches = regex.findall(r'(?=([a-z] [a-z]))',text)
+    #all_matches = regex.findall(r'(?=(\w \w))',text)
+    for match in all_matches:
+        text = regex.sub(match,match.replace(' ',''),text)
+    return text
+    
+# Open a csv 
+
+data_folder = '/Users/luissalamanca/Dropbox/My_stuff/05_SDSCresearch/02_NLP/Data/trained_annotation_data/title/exhaustive_label'
+folder_databaseAB = '/Users/luissalamanca/Dropbox/My_stuff/05_SDSCresearch/02_NLP/Data/AB_other/SessionOverviews_tar/'
+col2_f3_year = 1917
+
+year = 1893 # 1893
+flag_type_for_doc = 3
+
+flag_save_fig = 1
+
+name_file = '{}/{}.csv'.format(data_folder, year)
+df = pd.read_csv(name_file)
+
+names_col_df_feat = ['year','file_id','page_id','index','textl','textb','text','bbox',
+                     'font_type','font_size', 'in_char', 'end_char','num_line','length']
+# Plus the extras, such as vocabulary, font_type as one-hot, and vocabulary
+
+#%%
+
+'''
+
+unique_ids = np.unique(df['file_id'])
+
+#unique_ids = [unique_ids[0]]
+
+#f = open('test_titles.txt','w+')
+
+vec_df_feat = list()
+all_text_for_vocab = list()
+num_words= 1
+
+# More lines when we have 2 columns as the titles can span longer
+n_next_lines = 2
+if year >= col2_f3_year:
+    n_next_lines = 3
+    
+    
+for ids in unique_ids:
+    # Check all ids and pages, and create a parallel csv with full titles.
+    unique_pages = np.unique(df[df['file_id'] == ids]['page_id'])
+    input_file = './{}/{}.pdf'.format(year, ids)
+    doc = defc.Document(input_file, folder_database=folder_databaseAB, flag_type=flag_type_for_doc)
+    XML_main_corr = doc.load_or_compute_xml() 
+    XML_main_corr = utils_proc.convert_textlines_in_xml_tree(XML_main_corr)
+    
+    for page in unique_pages:
+        
+        # Reducing df to only those necessary, and iterate through titles
+        df_red = df[df['file_id'] == ids][df['page_id'] == page]
+        
+        xml_page = XML_main_corr[page]
+        all_textlines = xml_page.findall(".//textline")
+        all_bboxes = [o.attrib['bbox'] for o in all_textlines]
+        # NOTE: perhaps, not all the textlines have a bbox. In that case, there will
+        # a missmatch in the assignment between index all_bboxes and all_textlines
+        
+        df_titles = df_red[df_red['confidence'] == 1][df_red['category'] == 'title']
+        #ind_val = np.ravel(np.argwhere(ind_indexes <= np.max(df_red.index)))
+        for c_index_t, index_t in enumerate(df_titles.index):
+            
+            if (index_t == 553):
+                stop = 1
+                        
+            for n_next in range(n_next_lines + 1):
+                index_t_s = index_t + n_next
+                if c_index_t < (len(df_titles.index) - 1):
+                    
+                    if index_t_s == df_titles.index[c_index_t + 1]:
+                        #print(index_t_s)
+                        break
+                    
+                if index_t_s <= np.max(df_red.index):
+                    
+                    ind_textl, _ = size_matching_bbox(df_red['bbox'][index_t_s], all_bboxes)
+                    
+                    tot_len = 0
+                    for textb in range(len(all_textlines[ind_textl])):
+                        tot_len += len(all_textlines[ind_textl][textb].text)
+                    #tot_len = 1
+                    vec_len = list()
+                    count_len = 0
+                    
+                    for textb in range(len(all_textlines[ind_textl])):
+                        
+                        text_textb = all_textlines[ind_textl][textb].text
+                        vec_feat = [year, ids, page, index_t_s, ind_textl, textb,
+                                    text_textb,
+                                    df_red['bbox'][index_t_s],
+                                    all_textlines[ind_textl][textb].attrib['font'],
+                                    all_textlines[ind_textl][textb].attrib['size'],
+                                    float(count_len)/tot_len, float(count_len + len(text_textb))/tot_len,
+                                    n_next, float(len(text_textb))/tot_len]
+                        count_len += len(text_textb)
+                        vec_df_feat.append(vec_feat)
+                        all_text_for_vocab.append(' '.join(word_tokenize(text_textb)[:num_words]))
+                        
+
+                    
+                
+
+            #text_line_comp = all_textlines[ind_textl].text
+            #f.write(text_line_comp + '\n')
+
+#%%
+
+df_feat = pd.DataFrame(vec_df_feat, columns = names_col_df_feat)
+
+# Now one hot encoding for font type
+df_feat = pd.concat([df_feat,pd.get_dummies(df_feat['font_type'])], axis = 1)
+
+# Let's build the vocabulary only from the first X words
+
+vocab, ocurr = utils_feats.get_vocab(' '.join(all_text_for_vocab), min_ocurr = 5, flag_lower = 1, flag_stopw = 1, n_words = 10,
+              lang_stopw = ['german','french','italian'])
+
+#f.close()
+
+    
+
+#%%
+# Finally, function to find the first textblock that is not part of the title
+# Some set of rules:
+# 1. If we find any of the following words among the first num_words, then it
+# ends the title
+words_ends = ['prüfung', 'prufung', 'botschaft', 'bötschaft', 'betr', 'betr.', 'berichte', 'bericht',
+              'petitionen', 'schreiben', 'bot-', 'bot -', 'mitunterzeichnern', 'be-', 'be -']
+vec = CountVectorizer(vocabulary = words_ends, tokenizer = word_tokenize)
+mat_ocurr = vec.fit_transform(all_text_for_vocab).todense()
+
+df_feat_words_ends = pd.concat([df_feat, pd.DataFrame(mat_ocurr, columns = words_ends)], axis = 1)
+# 2. If not, we iterate through the textlines, taking the last textblock 
+# of the current line, and compare against the previous one (same or different line).
+# If it is different in font type, then we mark it as the start
+# 
+# IMPORTANT: for the moment, in order to make things simpler and more straightforward
+# I am just going to assume that the first line out of the titles has to be 
+# times-roman. Will see if this holds in the future
+######
+# Things to consider
+# * For later year, specially when we have two columns, we should consider more 
+#   textlines. We can just use the delimiters set in the doc class
+
+'''
+
+#%%
+all_indexes = np.array(df_feat_words_ends[df_feat_words_ends['num_line'] == 0][df_feat_words_ends['textb'] == 0].index)
+names_col_titles = ['year','file_id','page_id','index','title_text','in_textl','in_textb','in_index',
+                    'end_textl','end_textb','end_index','bbox_vec','textl_coverage','bbox_vec_conv']
+# bbox_vec: is a list with all the bboxes that form the title
+# textl_coverage is a list with the percentage of the bounding box covered
+# by the title
+
+# Conditions!
+def_terms = ['botschaft', 'bötschaft', 'prüfung', 'prufung', 'berichte', 'bericht', 'schreiben','mitunterzeichnern']
+def_terms_regex = ['[v,y][o,ò][m,n][i]?[ .\xad-]{0,10}[0-9]', '[" "]be[\xad,-][ ]{0,10}richt', '[" "]bot[-,\xad][ ]{0,10}schaft[ ]',
+                   '[ ]botschaft[ ]']
+font_notitle = 'Times-Roman'
+min_length = 10 # for textblocks to count
+min_length_bef = 6
+
+# For later years, it should be smaller as the sizes are almost the same
+# hence, 1 before 1917 and 0 after
+diff_fontsize = 0 # Max font size for the text outside the title (has to be smaller)
+if year >= col2_f3_year:
+    diff_fontsize = 1
+
+slack_cond_def_terms = 1 # Simply to allow that, if the condition of ind_min3, i.e. 
+# a def_terms if fulfilled, and it is not too far from the condition with the min 
+# index, we overwrite it
+#ind_1words = np.ravel(np.argwhere(df_feat_words_ends.columns == words_ends[0]))
+
+list_vec_titles = list()
+
+## Checking first def_terms and regex, exclusively. 
+for count_ind, index_t in enumerate(all_indexes):
+
+    flag_notend = 1
+    flag_fulltextblock = 1
+    # Corresponding indexes
+    if count_ind == (len(all_indexes) - 1):
+        ind_all_el = np.arange(index_t,np.max(df_feat_words_ends.index) + 1)
+    else:
+        ind_all_el = np.arange(index_t,all_indexes[count_ind + 1])
+    
+    df_aux = df_feat_words_ends.iloc[ind_all_el]
+
+    # Just the first textline and first block
+    in_textl = df_feat_words_ends['textl'].iloc[index_t]
+    in_textb = df_feat_words_ends['textb'].iloc[index_t]    
+    in_index = df_feat_words_ends['index'].iloc[index_t] 
+
+    if (df_aux['file_id'].iloc[0] == 110000009) and (df_aux['page_id'].iloc[0] == 1):
+        stop = 1
+        
+    # Merge all the text and check regular expressions
+    all_text_check_norm = ' '.join([o.text for ind_r, o in df_aux.iterrows()])
+    all_text_check = ' '.join([o.text.lower() for ind_r, o in df_aux.iterrows()])
+    all_lengths = np.array([len(o.text) for ind_r, o in df_aux.iterrows()]) + 1
+    all_lengths_cum = np.cumsum(all_lengths)
+    
+    ind_pot_ind = list()
+    term_match = list()
+    for term in def_terms:
+        ind_f = all_text_check.find(' ' + term)
+        if ind_f > -1:
+            text_bef_regex = ''.join(regex.findall(r'[a-zA-Z]',all_text_check[:ind_f]))
+            if len(text_bef_regex) >= min_length_bef:
+                ind_pot_ind.append(ind_f + 1) # +1 for the space before the word
+                term_match.append(term)
+    for term_re in def_terms_regex:
+        res_regex = regex.findall(term_re, all_text_check)
+        if len(res_regex):
+            ind_f = all_text_check.find(res_regex[0])
+            text_bef_regex = ''.join(regex.findall(r'[a-zA-Z0-9()]',all_text_check[:ind_f]))
+            if len(text_bef_regex) >= min_length_bef:
+                if res_regex[0][0] == ' ':
+                    ind_f += 1
+                ind_pot_ind.append(ind_f)
+                term_match.append(res_regex[0])
+
+    # Now, locate the textblock where we found the term
+    ind_pot_ind = np.array(ind_pot_ind)
+    term_match = np.array(term_match)
+    if len(ind_pot_ind):
+        flag_notend = 0
+        term_match = term_match[np.argmin(ind_pot_ind)]
+        ind_min_aux = np.min(np.ravel(np.argwhere(all_lengths_cum > np.min(ind_pot_ind))))
+        
+        if ind_min_aux == 0:
+            percent_textb = float(np.min(ind_pot_ind))/all_lengths[ind_min_aux]
+        else:
+            percent_textb = float(np.min(ind_pot_ind) - all_lengths_cum[ind_min_aux - 1])/all_lengths[ind_min_aux]
+            
+        if percent_textb < 0.1:
+            # We take the full line
+            flag_fulltextblock = 1
+            ind_min = ind_min_aux
+        else:
+            # We split the line
+            ind_min = ind_min_aux + 1
+            flag_fulltextblock = 0
+            val_coverage = df_aux['in_char'].iloc[ind_min_aux] + (percent_textb * 
+                                 (df_aux['end_char'].iloc[ind_min_aux] - df_aux['in_char'].iloc[ind_min_aux]))
+            #val_coverage = df_aux['in_char'].iloc[ind_min_aux] + (float(ind_find_textb)/len(df_aux['text'].iloc[ind_min_aux])*
+            #        (df_aux['end_char'].iloc[ind_min_aux] - df_aux['in_char'].iloc[ind_min_aux]))
+            text_title = all_text_check_norm[:np.min(ind_pot_ind)]
+    
+    # Now condition for abrupt jump in first line
+    # At the level of the textblock
+    if flag_notend:
+        ind_next_end = np.ravel(np.argwhere(df_aux['num_line'] == 0))[-1] # Only checking 1st line
+        if df_aux.shape[0] > (ind_next_end + 1):
+            coord = pt.coord_string_to_array(df_aux['bbox'].iloc[ind_next_end])
+            coord_next = pt.coord_string_to_array(df_aux['bbox'].iloc[ind_next_end + 1])
+            last_coord = coord[-2]
+            last_coord_next = coord_next[-2]
+            height_line = float(np.abs(coord[3] - coord[1]))
+            
+            if ((last_coord_next > (last_coord + np.max([last_coord, last_coord_next])/10)) and
+                (coord_next[3] < (coord[3] - height_line/2))):
+                ind_min = ind_next_end + 1
+                flag_notend = 0
+        
+    # Finally, condition for change of font size, font type, etc.
+    # At the level of the textblock
+    if flag_notend:
+
+        # Checking second condition
+        # The font has to be different and also the textblock as to covered until 
+        # the end of the line. These are the conditions
+        ind_min = -1
+        # Assuming the first line outside the title has to be times-roman
+        ind_timesR = np.ravel(np.argwhere(np.array(df_aux[font_notitle]) == 1))
+        ind_ending_textb = np.setdiff1d(ind_timesR,0)
+        # And here we check the parts in times roman, comparing to previous, and 
+        # next one
+        all_fontsizes = np.array(df_aux['font_size']).astype(float)
+        for ind_e, ind_e_textb in enumerate(ind_ending_textb):
+            if ((df_aux['font_type'].iloc[ind_e_textb] != df_aux['font_type'].iloc[ind_e_textb - 1])
+                and (df_aux['font_type'].iloc[ind_e_textb].find('Bold') == -1)
+                and (len(df_aux['text'].iloc[ind_e_textb].replace(' ','')) > min_length)
+                and ((np.max(all_fontsizes) - float(df_aux['font_size'].iloc[ind_e_textb])) >= diff_fontsize)):
+                
+                if ind_e_textb == np.max(ind_ending_textb):
+                    ind_min = ind_e_textb
+                    break
+                else:
+                    # Also similar to next one, that should be already times-roman
+
+                    # This condition is not avoid short things such as N, III, or sometimes numbers
+                    # that are recognized in bold, and to ensure that after this piece of text
+                    # we have a long times-roman text
+                    len_rest = np.array([len(o) for o in np.array(df_aux['text'].iloc[ind_ending_textb])])[(ind_e + 1):]
+                    # And to avoid short texts before
+                    text_bef = ''.join(df_aux['text'].iloc[:ind_e_textb])
+                    text_bef_regex = ''.join(regex.findall(r'[a-zA-Z]',text_bef))                    
+                    if len(np.argwhere(len_rest > min_length)) and (len(text_bef_regex) > min_length_bef):
+                        ind_min = ind_e_textb
+                        
+                    if ind_min >= 0:
+                        break
+        
+        # We are forcing to take times roman
+        if ind_min < 0 and len(ind_ending_textb):
+            ind_min = np.max(ind_ending_textb)
+            
+        if (ind_min > df_aux.shape[0]) and (ind_min != 1000):
+            ind_min = df_aux.shape[0] - 1
+                    
+        # To finally merge with some small part in the beginning of the sentence
+        if (ind_min != 1000):
+            if (df_aux['length'].iloc[ind_min - 1] < 0.1) and (df_aux['num_line'].iloc[ind_min - 1] > 0): # Less than 10% of the textline length
+                ind_min -= 1
+    
+    if (ind_min == 1000) or (ind_min == -1):
+        ind_min = df_aux.shape[0]
+        
+    end_textl = df_aux['textl'].iloc[ind_min - 1]
+    end_textb =df_aux['textb'].iloc[ind_min - 1]            
+    end_index = df_aux['index'].iloc[ind_min - 1]
+    
+    if flag_fulltextblock:
+        text_title = ' '.join(df_aux['text'].iloc[:ind_min])
+        val_coverage = df_aux['end_char'].iloc[ind_min - 1]
+    
+    # common for all cases
+    bbox_vec = list()
+    textl_coverage = list()
+    bbox_vec_conv = list()
+    for index_bb in np.arange(in_index, end_index + 1):
+        if index_bb < end_index:
+            textl_coverage.append(1)
+            bbox_vec_conv.append(df_aux[df_aux['index'] == index_bb]['bbox'].iloc[0])
+        bbox_vec.append(df_aux[df_aux['index'] == index_bb]['bbox'].iloc[0])
+    
+    textl_coverage.append(val_coverage)
+    arr_coord = pt.coord_string_to_array(bbox_vec[-1])
+    arr_coord[2] = arr_coord[0] + (arr_coord[2] - arr_coord[0]) * val_coverage
+    bbox_vec_conv.append(','.join(arr_coord.astype(str)))
+    # Stack all features
+    vec_new_el = [df_aux['year'].iloc[0], df_aux['file_id'].iloc[0], df_aux['page_id'].iloc[0], 
+                  df_aux['index'].iloc[0], text_title, in_textl, in_textb, in_index, end_textl, end_textb,
+                  end_index, bbox_vec, textl_coverage, bbox_vec_conv]
+
+    list_vec_titles.append(vec_new_el)
+ 
+ 
+df_titles_prev = df[df['confidence'] == 1][df['category'] == 'title']
+df_titles_def = pd.DataFrame(list_vec_titles, columns = names_col_titles)
+
+#%%
+# Saving figures
+flag_text = 1
+resolution = 200
+if flag_save_fig:
+    path_output_img = os.path.join(data_folder, '{}_previews'.format(year))
+    if not os.path.exists(path_output_img): 
+        os.makedirs(path_output_img)
+    
+    unique_ids = np.unique(df_titles_def['file_id'])
+    for ids in unique_ids:
+        # Check all ids and pages, and create a parallel csv with full titles.
+        unique_pages = np.unique(df_titles_def[df_titles_def['file_id'] == ids]['page_id'])
+        input_file = './{}/{}.pdf'.format(year, ids)
+        doc = defc.Document(input_file, folder_database=folder_databaseAB, flag_type=flag_type_for_doc)
+        XML_main = doc.xml
+        doc.pdf2imgobj(resolution = resolution)
+        for page in unique_pages:
+            # Reducing df to only those necessary, and iterate through titles
+            df_titles_def_red = df_titles_def[(df_titles_def['file_id'] == ids) & (df_titles_def['page_id'] == page)]
+            
+            # Obtain vector of bboxes
+            list_bbox = np.concatenate([row['bbox_vec_conv'] for o, row in df_titles_def_red.iterrows()])
+            list_bbox = np.array([o.split(',') for o in list_bbox]).astype(float)
+            img = np.array(doc.imgobj[page])
+            dim_bbox_page = np.array(XML_main[page].attrib['bbox'].split(',')).astype(np.float64)
+            
+            # Getting the image plotted
+            #img_arr = pt.highlight_text(img, list_bbox, dim_bbox_page, color_vec = 'red', filled = False, thick_line = 3)
+            list_bbox_t, _ = pt.adapt_coordtoimg(img, np.transpose(list_bbox), dim_bbox_page)
+            img_arr = pt.plot_bboxes_rainbow(img, list_bbox_t, n_colors = list_bbox_t.shape[1])
+            fig, axes = plt.subplots(1, 1, figsize=(16, 16))
+            axes.axis('off')
+            axes.imshow(img_arr)
+            
+            if flag_text:
+                for ind, row in df_titles_def_red.iterrows():
+                    text_t = row['title_text']
+                    bbox_v = pt.coord_string_to_array(row['bbox_vec'][0])
+                    bbox_t = np.ravel(pt.adapt_coordtoimg(img, bbox_v, dim_bbox_page)[0])
+                    axes.text(bbox_t[1] + 10, bbox_t[0] - 10, text_t, fontsize = 10, color = 'red')
+
+            format_fig = 'png'
+            name_fig = '{}/{}_page{}.{}'.format(path_output_img,doc.name_wo_ext,page,format_fig)
+            fig.savefig(name_fig, format = format_fig, dpi = resolution)
+            plt.close(fig)
\ No newline at end of file
diff --git a/src/python/run_titles_completion.py b/src/python/run_titles_completion.py
index 34c46f9b26a80666664197be54f5097ae92e812a..fa9037b8b8c0c4e1df54a2471cca42be4ee5020f 100644
--- a/src/python/run_titles_completion.py
+++ b/src/python/run_titles_completion.py
@@ -41,7 +41,8 @@ def size_matching_bbox(bbox, list_bbox):
     return np.argmin(dist_vec), np.min(dist_vec)
 
 def remove_space_char(text):
-    all_matches = regex.findall(r'(?=(\w \w))',text_regex)
+    all_matches = regex.findall(r'(?=([a-z] [a-z]))',text)
+    #all_matches = regex.findall(r'(?=(\w \w))',text)
     for match in all_matches:
         text = regex.sub(match,match.replace(' ',''),text)
     return text
@@ -52,7 +53,7 @@ data_folder = '/Users/luissalamanca/Dropbox/My_stuff/05_SDSCresearch/02_NLP/Data
 folder_databaseAB = '/Users/luissalamanca/Dropbox/My_stuff/05_SDSCresearch/02_NLP/Data/AB_other/SessionOverviews_tar/'
 col2_f3_year = 1917
 
-year = 1922 # 1893
+year = 1940 # 1893
 flag_type_for_doc = 3
 
 flag_save_fig = 1
@@ -67,6 +68,7 @@ names_col_df_feat = ['year','file_id','page_id','index','textl','textb','text','
 #%%
 
 '''
+
 unique_ids = np.unique(df['file_id'])
 
 #unique_ids = [unique_ids[0]]
@@ -164,7 +166,7 @@ vocab, ocurr = utils_feats.get_vocab(' '.join(all_text_for_vocab), min_ocurr = 5
 #f.close()
 
     
-'''
+
 #%%
 # Finally, function to find the first textblock that is not part of the title
 # Some set of rules:
@@ -188,6 +190,8 @@ df_feat_words_ends = pd.concat([df_feat, pd.DataFrame(mat_ocurr, columns = words
 # * For later year, specially when we have two columns, we should consider more 
 #   textlines. We can just use the delimiters set in the doc class
 
+'''
+
 #%%
 all_indexes = np.array(df_feat_words_ends[df_feat_words_ends['num_line'] == 0][df_feat_words_ends['textb'] == 0].index)
 names_col_titles = ['year','file_id','page_id','index','title_text','in_textl','in_textb','in_index',
@@ -198,10 +202,10 @@ names_col_titles = ['year','file_id','page_id','index','title_text','in_textl','
 
 # Conditions!
 def_terms = ['botschaft', 'bötschaft', 'prüfung', 'prufung', 'berichte', 'bericht', 'schreiben','mitunterzeichnern']
-def_terms_regex = ['vom [1-9]', '[" "]be[" ",-,\xad]', '[" "]bot[" ",-,\xad]']
+def_terms_regex = ['vom *[0-9]', '[" "]be[" ",-,\xad]', '[" "]?^?bot[$," ",-,\xad]?$?','bot']
 font_notitle = 'Times-Roman'
 min_length = 10 # for textblocks to count
-min_length_bef = 14
+min_length_bef = 6
 diff_fontsize = 1 # Max font size for the text outside the title (has to be smaller)
 slack_cond_def_terms = 1 # Simply to allow that, if the condition of ind_min3, i.e. 
 # a def_terms if fulfilled, and it is not too far from the condition with the min 
@@ -220,7 +224,7 @@ for count_ind, index_t in enumerate(all_indexes):
     
     df_aux = df_feat_words_ends.iloc[ind_all_el]
 
-    if (df_aux['file_id'].iloc[0] == 110000459) and (df_aux['page_id'].iloc[0] == 20):
+    if (df_aux['file_id'].iloc[0] == 110000771) and (df_aux['page_id'].iloc[0] == 9):
         stop = 1
 
     # Just the first textline and first block
@@ -241,7 +245,9 @@ for count_ind, index_t in enumerate(all_indexes):
         if (n_line == 0) and len(ind_sum_words_def):
             for ind_s_def in ind_sum_words_def:
                 text_bef = ''.join(df_aux['text'].iloc[:int(ind_s_def)])
-                if len(text_bef) > min_length_bef:
+                # Regex to consider only character
+                text_bef_regex = ''.join(regex.findall(r'[a-zA-Z]',text_bef))
+                if len(text_bef_regex) >= min_length_bef:
                     flag_valid = 1
                     break
             if flag_valid:
@@ -264,7 +270,8 @@ for count_ind, index_t in enumerate(all_indexes):
         # Checking that we are not taking a line too early
         if (n_line == 0) and (ind_regex > -1):
             text_bef = ''.join(df_aux['text'].iloc[:ind_regex]).replace(' ','')
-            if len(text_bef) < min_length_bef:
+            text_bef_regex = ''.join(regex.findall(r'[a-zA-Z]',text_bef))
+            if len(text_bef_regex) < min_length_bef:
                 ind_regex = -1
         
         # 2nd hard: line much shorter than next. This is to indicate and abrupt jump.
@@ -314,26 +321,30 @@ for count_ind, index_t in enumerate(all_indexes):
             ind_min3 = 1000
             ind_find = -1
             for ind_r in ind_num_line:
-                text_block = remove_space_char(df_aux['text'].iloc[ind_r].lower())
+                #text_block = remove_space_char(df_aux['text'].iloc[ind_r].lower())
+                text_block = df_aux['text'].iloc[ind_r].lower()
                 for term_def in def_terms:
                     if  term_def in word_tokenize(text_block)[num_words:]:
                         ind_find = df_aux['text'].iloc[ind_r].lower().find(term_def)
                         text_title = text_title3 + ' ' + df_aux['text'].iloc[ind_r][:ind_find]
                         # To avoid too short titles
-                        if len(text_title.replace(' ','')) > min_length_bef:
+                        if len(text_title.replace(' ','')) > min_length:
                             flag_fulltextblock = 0
                             break
                 if not ind_find > -1:
                     for ireg in def_terms_regex:
-                        text_regex = ' '.join(word_tokenize(text_block)[num_words:])
-                        res_regex = regex.findall(ireg, text_regex)
-                        if res_regex:
-                            ind_find = df_aux['text'].iloc[ind_r].lower().find(res_regex[0])
-                            text_title = text_title3 + ' ' + df_aux['text'].iloc[ind_r][:ind_find]
-                            # To avoid too short titles
-                            if len(text_title.replace(' ','')) > min_length_bef:
-                                flag_fulltextblock = 0
-                                break
+                        tokenize_tb = word_tokenize(text_block)
+                        if len(tokenize_tb) > num_words:
+                            index_start = text_block.find(tokenize_tb[num_words])
+                            #text_regex = ' '.join(word_tokenize(text_block)[num_words])
+                            res_regex = regex.findall(ireg, text_block[index_start:])
+                            if res_regex:
+                                ind_find = df_aux['text'].iloc[ind_r].lower().find(res_regex[0])
+                                text_title = text_title3 + ' ' + df_aux['text'].iloc[ind_r][:ind_find]
+                                # To avoid too short titles
+                                if len(text_title.replace(' ','')) > min_length:
+                                    flag_fulltextblock = 0
+                                    break
                         
                 if not flag_fulltextblock:
                     val_coverage = df_aux['in_char'].iloc[ind_r] + (float(ind_find)/len(df_aux['text'].iloc[ind_r])*
@@ -396,7 +407,7 @@ for count_ind, index_t in enumerate(all_indexes):
             else:
                 ind_imin = np.argmin([ind_min1, ind_min2, ind_min3])
                 if ind_imin == 2:
-                    ind_imin = ind_min3
+                    ind_min = ind_min3
                 else:
                     ind_min = np.min([ind_min1, ind_min2])
                     if (ind_min + slack_cond_def_terms) >= ind_min3:
@@ -411,12 +422,13 @@ for count_ind, index_t in enumerate(all_indexes):
             if (ind_min != 1000):
                 if (df_aux['length'].iloc[ind_min - 1] < 0.1) and (df_aux['num_line'].iloc[ind_min - 1] > 0): # Less than 10% of the textline length
                     ind_min -= 1
-                
+                    
             # Checking that we are not taking a line too early. Therefore, we just 
             # require for some minimum length of text for the case of the first line
             if (n_line == 0) and (ind_min > -1):
                 text_bef = ''.join(df_aux['text'].iloc[:ind_min])
-                if len(text_bef) < min_length_bef:
+                text_bef_regex = ''.join(regex.findall(r'[a-zA-Z]',text_bef))
+                if len(text_bef_regex) < min_length_bef:
                     ind_min = 1000
 
                 
@@ -459,7 +471,7 @@ df_titles_def = pd.DataFrame(list_vec_titles, columns = names_col_titles)
 #%%
 # Saving figures
 flag_text = 1
-resolution = 100
+resolution = 200
 if flag_save_fig:
     path_output_img = os.path.join(data_folder, '{}_previews'.format(year))
     if not os.path.exists(path_output_img): 
diff --git a/src/python/test_correct.py b/src/python/test_correct.py
index 1da770adda994d958719c88de9396e265ecbe454..23d0aa22fae351c4585896ac484eb3d6a7a59eda 100644
--- a/src/python/test_correct.py
+++ b/src/python/test_correct.py
@@ -1,19 +1,22 @@
 import os
 os.environ['DEMOCRASCI_DATA'] = "/Users/luissalamanca/My_stuff/05_SDSCresearch/02_NLP/Code/democrasci_preprocwp1/data/"
 
+import numpy as np
+import matplotlib.pyplot as plt
+import plot_tools as pt
 
 import def_classes as defc
 import utils_proc
 
 year = 1982 
-year = 1922
+year = 1950
 folder_database = '../../data/AB_other/SessionOverviews_tar/'
 folder_database = '/Users/luissalamanca/Dropbox/My_stuff/05_SDSCresearch/02_NLP/Data/AB_other/SessionOverviews_tar'
 
 #%%
 
 iddoc = '110001467'
-iddoc = '110000455'
+iddoc = '110000959'
 input_file = "./{}/{}.pdf".format(year, iddoc)
 
 doc = defc.Document(input_file, folder_database, flag_type = 3)
@@ -42,3 +45,45 @@ for l_d in list_docs:
     doc.correct_xml(flag_plots = 1, flag_parallel = 0, flag_save_figs = 1,
                         pages = 'all', suffix_xml = '_data', name_outxml = '02_extractedxml',
                         name_outcorrxml = '04_correctedxml', flag_save = 1)
+
+#%%
+# Visualize how the page look directly extracted from the original xml,
+# without any ordering
+
+import xml.etree.ElementTree as ET
+
+year = 1940
+iddoc = '110000779'
+
+input_file = "./{}/{}.pdf".format(year, iddoc)
+
+doc = defc.Document(input_file, folder_database, flag_type = 3)
+doc.pdf2imgobj()
+
+name_outxml = '02_extractedxml'
+
+name_xml = './' + str(year) + '/' + str(iddoc) + '_data.xml'
+if name_xml in utils_proc.get_contained_file_names(year, folder_database, name_outxml):
+    h_xml = utils_proc.get_handlerfile(name_xml, doc.folder_database, name_outxml)
+    XML_tree = ET.parse(h_xml)
+    XML_main = XML_tree.getroot()
+    
+#%%
+
+for page in range(len(doc.imgobj)):
+#for page in range(2):
+
+    # Obtain vector of bboxes
+    list_bbox = XML_main[page].findall(".//textline")
+    list_bbox = np.array([o.attrib['bbox'].split(',') for o in list_bbox]).astype(float)
+    img = np.array(doc.imgobj[page])
+    dim_bbox_page = np.array(XML_main[page].attrib['bbox'].split(',')).astype(np.float64)
+    
+    # Getting the image plotted
+    #img_arr = pt.highlight_text(img, list_bbox, dim_bbox_page, color_vec = 'red', filled = False, thick_line = 3)
+    list_bbox_t, _ = pt.adapt_coordtoimg(img, np.transpose(list_bbox), dim_bbox_page)
+    img_arr = pt.plot_bboxes_rainbow(img, list_bbox_t, n_colors = list_bbox_t.shape[1])
+    fig, axes = plt.subplots(1, 1, figsize=(16, 16))
+    axes.axis('off')
+    axes.imshow(img_arr)
+            
diff --git a/src/python/test_titles.txt b/src/python/test_titles.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4c824d13cb5d2821a8fbda05197af9d9eec725b
--- /dev/null
+++ b/src/python/test_titles.txt
@@ -0,0 +1,72 @@
+[font face="Times-Bold" size="10.500"]1.  Wahlaktenpriifung.  [/font][font face="Times-Roman" size="8.500"]Prüfung  der Wahlakten  neuer  Mitglieder.  [/font]
+[font face="Times-Bold" size="10.500"]2 Bureaux-Neubestellung.  [/font][font face="Times-Roman" size="8.500"]Wahl  des  Bureau  des  Nationalrates  und  desjenigen  des  Ständerates.  [/font]
+[font face="Times-Bold" size="10.500"]3.  Wahl  der  Büdget-Commissionen  [/font][font face="Times-Roman" size="8.500"]des  Nationalrates  und  des  Ständerates  für  das  Budget  von  1894.  [/font]
+[font face="Times-Bold" size="10.500"]4.  Geschäftsbericht  und  Staatsrechnung  für  das  Jahr  1892.  [/font]
+[font face="Times-Roman" size="9.500"]5.  [/font][font face="Times-Bold" size="10.500"]^Vertretung  der  Schweiz  im  Auslande.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf vom  19. Mai  1893 (Bundesblatt  III. 69),  betr  [/font]
+[font face="Times-Bold" size="10.500"]7 [/font][font face="Times-Roman" size="9.500"](65).  [/font][font face="Times-Italic" size="8.000"]8 [/font][font face="Times-Bold" size="10.500"]Schulwandkarte  der  Schweiz.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  20.  März  1893  (Bundesblatt  I. 1019)  betr  Er­ [/font]
+[font face="Times-Bold" size="10.500"]9 [/font][font face="Times-Roman" size="9.500"](7).  [/font][font face="Times-Bold" size="10.500"]S Polytechnikum.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  27. Januar  1893  (Bundesblatt  I.  353),  betr.  Erhöhung  des  Jahres[/font]
+[font face="Times-Bold" size="10.500"].0(8).  [/font][font face="Times-BoldItalic" size="8.500"]S  [/font][font face="Times-Bold" size="10.500"]Nationalbibliothek.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  [/font][font face="Times-Bold" size="10.500"]8. [/font][font face="Times-Roman" size="9.500"]März  [/font][font face="Times-Bold" size="10.500"]1893  [/font][font face="Times-Roman" size="9.500"](Bundesblatt  I. [/font][font face="Times-Bold" size="10.500"]1000),  [/font][font face="Times-Roman" size="9.500"]betr.  die  Gründuno einer  [/font]
+[font face="Times-Bold" size="10.500"]11.  S Cholera-TJebereinkunft.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  16.  Mai  1893  (Bundesblatt  III.  159),  betr.  Ratification  der  [/font]
+[font face="Times-Bold" size="10.500"]12.  s Gesundheitsamt  beim  Departement  des  Innern.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  19.  Mai  1893  (Bundesblatt  [/font]
+[font face="Times-Roman" size="9.500"]IB [/font][font face="Times-Bold" size="10.500"](9).  [/font][font face="Times-Italic" size="8.000"]fi [/font][font face="Times-Bold" size="10.500"]Parlamentsgebäude.  [/font][font face="Times-Roman" size="9.500"]Botschaft  vom  [/font][font face="Times-Bold" size="10.500"]2.  [/font][font face="Times-Roman" size="9.500"]Juni  [/font][font face="Times-Bold" size="10.500"]1892  [/font][font face="Times-Roman" size="9.500"](Bundesblatt  III.  [/font][font face="Times-Bold" size="10.500"]572),  [/font][font face="Times-Roman" size="9.500"]betr.  Erwerbung  der  Casinoliegenschaft  in  Bern  [/font]
+[font face="Times-Bold" size="10.500"]14(11).  S Archivgebäude.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  [/font][font face="Times-Bold" size="10.500"]8.  [/font][font face="Times-Roman" size="9.500"]Dezember  [/font][font face="Times-Bold" size="10.500"]1892  [/font][font face="Times-Roman" size="9.500"](Bandesblatt  V.  [/font][font face="Times-Bold" size="10.500"]764),  [/font][font face="Times-Roman" size="9.500"]betr.  Bewilligung  des  [/font]
+[font face="Times-Bold" size="10.500"]15(12).  [/font][font face="Times-BoldItalic" size="10.500"]n [/font][font face="Times-Bold" size="10.500"]Schallenberg strasse.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  [/font][font face="Times-Bold" size="10.500"]9. [/font][font face="Times-Roman" size="9.500"]September  [/font][font face="Times-Bold" size="10.500"]1892  [/font][font face="Times-Roman" size="9.500"](Bundesblatt  IV. [/font][font face="Times-Bold" size="10.500"]441),  [/font][font face="Times-Roman" size="9.500"]betr. Bewilligung  [/font]
+[font face="Times-Bold" size="10.000"]16.  [/font][font face="Times-Italic" size="8.000"]n [/font][font face="Times-Bold" size="10.500"]Schangnau—Wiggen-Strasse.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  [/font][font face="Times-Bold" size="10.500"]19. [/font][font face="Times-Roman" size="9.500"]Mai  [/font][font face="Times-Bold" size="10.500"]1893  [/font][font face="Times-Roman" size="9.500"](Bundesblatt  III. [/font][font face="Times-Bold" size="10.500"]11),  [/font][font face="Times-Roman" size="9.500"]betr.  Bewilligung  [/font]
+[font face="Times-Bold" size="10.000"]17.  [/font][font face="Times-BoldItalic" size="8.500"]S  [/font][font face="Times-Bold" size="10.500"]Sisselnbach-Correction.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  [/font][font face="Times-Bold" size="10.500"]18,  [/font][font face="Times-Roman" size="9.500"]April [/font][font face="Times-Bold" size="10.500"]1893  [/font][font face="Times-Roman" size="9.500"](Bundesblatt II. [/font][font face="Times-Bold" size="10.500"]739),  [/font][font face="Times-Roman" size="9.500"]betr. Zusicherung  einer  [/font]
+[font face="Times-Bold" size="10.000"]18.  [/font][font face="Times-BoldItalic" size="10.500"]n [/font][font face="Times-Bold" size="10.500"]Rheinregulierung.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom [/font][font face="Times-Bold" size="10.500"]26.  [/font][font face="Times-Roman" size="9.500"]Mai [/font][font face="Times-Bold" size="10.500"]1893  [/font][font face="Times-Roman" size="9.500"](Bandesblatt III. [/font][font face="Times-Bold" size="10.500"]101),  [/font][font face="Times-Roman" size="9.500"]betr. Ratification  des  Staats­ [/font]
+[font face="Times-Bold" size="10.000"]19[/font][font face="Times-Bold" size="10.500"](5).  [/font][font face="Times-BoldItalic" size="10.500"]n [/font][font face="Times-Bold" size="10.500"]Initiativbegehren  betr.  das  Verbot  des  Schlachtens  ohne  vorherige  Betäubung.  [/font][font face="Times-Roman" size="9.500"]Wortlaut  des  Begehrens:  [/font]
+[font face="Times-Bold" size="10.500"]762).  [/font][font face="Times-Roman" size="9.500"]— Zu  vergi,  auch  Bundesblatt  IV.  [/font][font face="Times-Bold" size="10.500"]339  [/font][font face="Times-Roman" size="9.500"]und  [/font][font face="Times-Bold" size="10.500"]477.  [/font]
+[font face="Times-Bold" size="10.000"]20 [/font][font face="Times-Bold" size="10.500"](14).  [/font][font face="Times-Italic" size="8.000"]n [/font][font face="Times-Bold" size="10.000"]Politische  Rechte.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Gesetzes-Entwurf vom  [/font][font face="Times-Bold" size="10.500"]2.  [/font][font face="Times-Roman" size="9.500"]Juni  [/font][font face="Times-Bold" size="10.500"]1882  [/font][font face="Times-Roman" size="9.500"](Bundesblatt III. [/font][font face="Times-Bold" size="10.500"]1),  [/font][font face="Times-Roman" size="9.500"]betretend  die  politischen  Rechte  [/font]
+[font face="Times-Bold" size="10.000"]21 [/font][font face="Times-Roman" size="9.500"](15).  [/font][font face="Times-BoldItalic" size="10.500"]s [/font][font face="Times-Bold" size="10.000"]Schweizerische Eisenbahnrente. [/font][font face="Times-Roman" size="9.500"]Bericht  des Bundesrates  vom  23. October  1891 (Bundesblatt  IV. 620)  über die  Frage der  [/font]
+[font face="Times-Bold" size="10.000"]22[/font][font face="Times-Roman" size="9.500"](16).  [/font][font face="Times-BoldItalic" size="10.500"]n [/font][font face="Times-Bold" size="10.000"]Tessiner Interventionskosten.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  15. Dezember 1891  (Bundesblatt  V.  785),  betr.  [/font]
+[font face="Times-Roman" size="9.500"]23(18). [/font][font face="Times-Bold" size="10.000"]s Recurs Käslin.  [/font][font face="Times-Roman" size="9.500"]Recurs  der Familie  Käslin  «zum Freienhof»  in Stansstad  gegen  den Bundesratsbeschluss  vom 6.  August 1891  [/font]
+[font face="Times-Bold" size="10.000"]24 a [/font][font face="Times-Roman" size="9.500"](20).  [/font][font face="Times-BoldItalic" size="10.500"]n [/font][font face="Times-Bold" size="10.000"]Maifeier 1892.  [/font][font face="Times-Roman" size="9.500"]Petitionen  der  [/font][font face="Times-Roman" size="8.500"]Schweiz. [/font][font face="Times-Roman" size="9.500"]Volksversammlungen  vom  1.  Mai  1892,  sowie  einer Volksversammlung in  Siebnen  [/font]
+[font face="Times-Bold" size="10.000"]24 b.  n Maifeier  1893.  [/font][font face="Times-Roman" size="9.500"]Petitionen  der  schweizerischen  Volksversammlungen  vom  1.  Mai  1893  betreffend  Verkürzung  der  Arbeitszeit,  [/font]
+[font face="Times-Bold" size="10.000"]25 [/font][font face="Times-Roman" size="9.500"](59).  [/font][font face="Times-BoldItalic" size="8.500"]S  [/font][font face="Times-Bold" size="10.000"]Recurs  Krummenacher.  [/font][font face="Times-Roman" size="9.500"]Recurs  des  Ludwig  Krummenacher  «zum  Hirschen»  in  Sarnen  gegen'  den  Bundesratsbeschluss  [/font]
+[font face="Times-Bold" size="10.000"]26 [/font][font face="Times-Roman" size="9.500"](66).  [/font][font face="Times-BoldItalic" size="10.500"]n [/font][font face="Times-Bold" size="10.000"]Beschwerde  Lœw.  [/font][font face="Times-Roman" size="9.500"]Bericht  des  Bundesrates  yom  19.  Mai  1893  (Bundesblatt  III.  17),  betr.  die  Beschwerde  des  Soldaten  [/font]
+[font face="Times-Bold" size="10.000"]27 [/font][font face="Times-Roman" size="9.500"](67).  [/font][font face="Times-BoldItalic" size="8.500"]S  [/font][font face="Times-Bold" size="10.000"]Recurs  Bérard.  [/font][font face="Times-Roman" size="9.500"]Recurs  des  Pacifique  Bérard  in  Genf  gegen  zwei  Bundesratsbeschlüsse  vom  2. September  und 22.  November  [/font]
+[font face="Times-Bold" size="10.000"]28 [/font][font face="Times-Roman" size="9.500"](30).  [/font][font face="Times-BoldItalic" size="10.500"]s [/font][font face="Times-Bold" size="10.000"]Besoldungsgesetz  für  die  Beamten  des  Militärdepartements.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  21.  No­ [/font]
+[font face="Times-Bold" size="10.000"]29 a [/font][font face="Times-Roman" size="9.500"](31).  [/font][font face="Times-BoldItalic" size="10.500"]s [/font][font face="Times-Bold" size="10.000"]Eingabe  von  Pferdezüchtern  der  romanischen  Schweiz  [/font][font face="Times-Roman" size="9.500"]betr.  Ankauf  von  Artillerie und  Cavalleriepferden  im  [/font]
+[font face="Times-Bold" size="10.500"]b [/font][font face="Times-Roman" size="9.500"](32).  [/font][font face="Times-Italic" size="8.000"]8 [/font][font face="Times-Bold" size="10.500"]Einmietung  von  Artillerie-Zugpferden.  [/font][font face="Times-Roman" size="9.500"]Eingabe  des  Vereins  ostschweizerischer  Pferdezüchter  und  Pferdeliebhaber  [/font]
+[font face="Times-Bold" size="10.000"]80 [/font][font face="Times-Roman" size="9.500"](33).  [/font][font face="Times-BoldItalic" size="10.500"]s [/font][font face="Times-Bold" size="10.000"]Landsturm.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  12. Mai  1893  (Bundesblatt  II.  1014),  betr.  die  Bewaffnung,  die  Aus­ [/font]
+[font face="Times-Bold" size="10.000"]31 [/font][font face="Times-Roman" size="9.500"](68).  [/font][font face="Times-BoldItalic" size="10.500"]s [/font][font face="Times-Bold" size="10.000"]Militärische  Telegraphen und  Telephonlinien.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  27.  März  1893  (Bundes­ [/font]
+[font face="Times-Bold" size="10.000"]32.  [/font][font face="Times-Italic" size="8.000"]tl [/font][font face="Times-Bold" size="10.500"]Kriegsmaterialanschaffungen,  [/font][font face="Times-Roman" size="9.500"]Büdget  für  1894.  Botschaft  und  Beschlusses-Entwurf  vom  29.  Mai  1893  (B.-Bl.  [/font][font face="Times-Bold" size="10.000"]III.  [/font][font face="Times-Roman" size="9.500"]191),  [/font]
+[font face="Times-Bold" size="10.000"]33.  [/font][font face="Times-BoldItalic" size="10.500"]s [/font][font face="Times-Bold" size="10.000"]Entschädigung  für  Recru tenausrüstung  pro  1894.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  30.  Mai  1893  (Bundes­ [/font]
+[font face="Times-Bold" size="10.000"]34.  [/font][font face="Times-BoldItalic" size="8.500"]S  [/font][font face="Times-Bold" size="10.000"]Militärgeleiseanlage  bei  Göschenen.  [/font][font face="Times-Roman" size="9.500"]Botschaft  nnd  Beschlusses-Entwurf  vom  23.  Mai  1893  (Bundesblatt  III.  21),  betr.  [/font]
+[font face="Times-Bold" size="10.000"]35.  S Notportionen  und  Notrationen.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  29. Mai  1893 (Bundesblatt III. 105),  betr. Verab­ [/font]
+[font face="Times-Bold" size="10.000"]36.  [/font][font face="Times-BoldItalic" size="8.500"]S  [/font][font face="Times-Bold" size="10.000"]Verpflegungs und  Magazinbureau.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  30.  Mai  1893  (Bundesblatt  III.  115),  betr.  [/font]
+[font face="Times-Bold" size="10.000"]37.  [/font][font face="Times-BoldItalic" size="8.500"]S  [/font][font face="Times-Bold" size="10.000"]Recurs Römer  und Carrard.  [/font][font face="Times-Roman" size="9.500"]Bericht  des Bundesrates vom  30. Mai  1893 (Bundesblatt  III. 126) über  den Recurs  der HH. Römer  [/font]
+[font face="Times-Bold" size="10.000"]38.  [/font][font face="Times-BoldItalic" size="10.500"]s [/font][font face="Times-Bold" size="10.000"]Artillerie-Versuchsstation  in  Thun.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  30. Mai  1893  (Bundesblatt  III.  119),  betr.  [/font]
+[font face="Times-Bold" size="10.000"]39.  [/font][font face="Times-Roman" size="9.500"]S [/font][font face="Times-Bold" size="10.000"]Nachtragscredite  (I.  [/font][font face="Times-Roman" size="9.500"]Serie).  Botschaft  und  Beschlusses-Entwurf  vom  2.  Juni  1893  (Bundesblatt  [/font][font face="Times-Bold" size="10.000"]III.  [/font][font face="Times-Roman" size="9.500"]212),  betr.  Bewilligung  [/font]
+[font face="Times-Bold" size="10.000"]40 [/font][font face="Times-Roman" size="9.500"](37).  [/font][font face="Times-BoldItalic" size="10.500"]n [/font][font face="Times-Bold" size="10.000"]Zollgesetz,  Revision.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und Gesetzes-Entwurf  vom 30. Mai  1892  (Bundesblatt III.  410),  betr. Revision  des Bundes­ [/font]
+[font face="Times-Bold" size="10.000"]41.  [/font][font face="Times-BoldItalic" size="10.500"]n [/font][font face="Times-Bold" size="10.000"]Zuckerrückzoll.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  26. Mai  1893  (Bundesblatt III. 87),  betreffend die  fernere  Gewährung  [/font]
+[font face="Times-Bold" size="10.000"]42 [/font][font face="Times-Roman" size="9.500"](38).  [/font][font face="Times-BoldItalic" size="10.500"]n [/font][font face="Times-Bold" size="10.000"]Arbeit  in  den  Fabriken.  [/font][font face="Times-Roman" size="9.500"]Bericht  des  Bundesrates  vom  3.  Juni  1891  (Bundesblatt  III.  194),  betr.  vier  Beschlüsse  der  [/font]
+[font face="Times-Bold" size="10.000"]43 [/font][font face="Times-Roman" size="9.500"](39).  [/font][font face="Times-BoldItalic" size="10.500"]s [/font][font face="Times-Bold" size="10.000"]Zündhölzchenmonopol.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  20.  November  1891  (Bundesblatt  V.  413) ,  betr.  Ein­ [/font]
+[font face="Times-Bold" size="10.000"]44[/font][font face="Times-Roman" size="9.500"](40).  [/font][font face="Times-BoldItalic" size="8.500"]S  [/font][font face="Times-Bold" size="10.000"]Gewerbe-Gesetzgebung.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom  25.  November  1892  (Bundesblatt  V.  366),  betr.  Ein­ [/font]
+[font face="Times-Bold" size="10.000"]45 [/font][font face="Times-Roman" size="9.500"](41).  [/font][font face="Times-BoldItalic" size="8.500"]n [/font][font face="Times-Bold" size="10.000"]Förderung  der  Landwirtschaft  durch  den  Bund.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Gesetzes-Entwurf  vom  28.  November  1892  [/font]
+[font face="Times-Bold" size="10.000"]46  [/font][font face="Times-Roman" size="9.500"](42).  [/font][font face="Times-Bold" size="10.000"]Eisenbahngeschäfte.  [/font][font face="Times-Roman" size="9.500"](Priorität  beim  Ständerat.)  [/font]
+[font face="Times-Bold" size="10.000"]47 [/font][font face="Times-Roman" size="9.500"](43).  [/font][font face="Times-BoldItalic" size="10.500"]s [/font][font face="Times-Bold" size="10.000"]Stundenzonenzeit.  [/font][font face="Times-Roman" size="9.500"]Bericht  des  Bundesrates  vom  [/font][font face="Times-Bold" size="10.000"]17.  [/font][font face="Times-Roman" size="9.500"]Juni 1892  (Bundesblatt III. 1034),  betr.  die  Einführung  der Stunden­ [/font]
+[font face="Times-Bold" size="10.000"]48 [/font][font face="Times-Roman" size="9.500"](45).  [/font][font face="Times-Italic" size="8.000"]n [/font][font face="Times-Bold" size="10.000"]Motionen  Comtesse  und  Curti.  [/font][font face="Times-Roman" size="9.500"]Bericht  des  Bundesrates  vom  7.  Juni  1892  (Bundesblatt  III.  473)  zu  den  Motionen  [/font]
+[font face="Times-Bold" size="10.000"]49[/font][font face="Times-Roman" size="9.500"](48).  [/font][font face="Times-Italic" size="8.000"]n [/font][font face="Times-Bold" size="10.000"]Postregalgesetz.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Gesetzes-Entwurf  vom  14. Januar 1893  (Bundesblatt  I.  77)  zu  einem  neuen  Bundesgesetze  [/font]
+[font face="Times-Bold" size="10.000"]50 [/font][font face="Times-Roman" size="9.500"](49).  [/font][font face="Times-BoldItalic" size="10.500"]s [/font][font face="Times-Bold" size="10.000"]Telephonverbindung  zwischen Tessin  und  der  innerschweiz.  [/font][font face="Times-Roman" size="9.500"]Bericht  des  Bundesrates  vom  [/font][font face="Times-Bold" size="10.000"]24.  [/font][font face="Times-Roman" size="9.500"]Januar  1893  [/font]
+[font face="Times-Bold" size="10.000"]51.  [/font][font face="Times-BoldItalic" size="10.500"]n [/font][font face="Times-Bold" size="10.000"]Telephongebühren.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Gesetzes-Entwurf  vom  15.  November  1892  (Bundesblatt  V.  313),  betr.  Ermässigung  der  [/font]
+[font face="Times-Bold" size="10.000"]52 [/font][font face="Times-Roman" size="9.500"](50)  [/font][font face="Times-Bold" size="10.000"]Motion  von Hrn.  Nationalrat  Brunner  [/font][font face="Times-Roman" size="9.500"]und  Mitunterzeichnern,  vom  15.  Juni  1892.  [/font]
+[font face="Times-Bold" size="10.500"]2, ÄT ffiÄ  WsStìSju ». .«[/font][font face="Times-Bold" size="6.820"]s[/font][font face="Times-Bold" size="10.500"].»  [/font]
+[font face="Times-Bold" size="10.000"]56.  [/font][font face="Times-BoldItalic" size="10.500"]n [/font][font face="Times-Bold" size="10.000"]Zollrückvergütung  auf  Mais.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und Beschlusses-Entwurf  vom  2. Juni  1893  (Bundesblatt  [/font][font face="Times-Bold" size="10.000"]III.  [/font][font face="Times-Roman" size="9.500"]258),  betreffend  die  [/font]
+[font face="Times-Bold" size="10.000"]55 [/font][font face="Times-Roman" size="9.500"](63)  [/font][font face="Times-Bold" size="10.000"]Motion  von  Hrn.  Nationalrat  Brenner  [/font][font face="Times-Roman" size="9.500"]und  Mitunterzeichnern,  vom  15.  März  1893.  [/font]
+[font face="Times-Bold" size="10.000"]53[/font][font face="Times-Roman" size="9.500"](51).  [/font][font face="Times-Bold" size="10.000"]Motion  von  Hrn.  Nationalrat  Ador  [/font][font face="Times-Roman" size="9.500"]und  Mitunterzeichnern,  vom  15.  Juni 1892.  [/font]
+[font face="Times-Bold" size="10.000"]54[/font][font face="Times-Roman" size="9.500"](53).  [/font][font face="Times-Bold" size="10.000"]Motion  von  Hrn.  Nationalrat  Curti  [/font][font face="Times-Roman" size="9.500"]und  Mitunterzeichnern,  vom  20.  Juni  1892.  [/font]
+[font face="Times-Roman" size="9.500"]57.  [/font][font face="Times-Bold" size="10.000"]Motion  von  Hrn.  Nationalrat  Aeby  [/font][font face="Times-Roman" size="9.500"]und  Mitunterzeichnern,  vom  5.  Juni  1893.  [/font]
+[font face="Times-Roman" size="9.500"]58.  [/font][font face="Times-Bold" size="10.000"]Begnadigungsgesuch  Boffa.  [/font][font face="Times-Roman" size="9.500"]Bericht  des  Bundesrates  vom  21.  Juni  1893  (Bundesblatt  III.  631),  betr.  das  Begnadigungsge­ [/font]
+[font face="Times-Roman" size="9.500"]59.  [/font][font face="Times-Italic" size="8.000"]n [/font][font face="Times-Bold" size="10.000"]Petition  der  Zuckerfabrik  „Helvetia" in  Monthey,  [/font][font face="Times-Roman" size="9.500"]vom  25. Mai  1893,  um  Ermässigung  des  Eingangszolles  für  Boh[/font]
+[font face="Times-Bold" size="10.000"]60.  Motion  von  Hrn.  Nationalrat  Hochstrasser  [/font][font face="Times-Roman" size="9.500"]und  Mitunterzeichner,  vom  10.  Juni  1893.  [/font]
+[font face="Times-Bold" size="10.000"]61.  Interpellation  von  Hrn.  Nationalrat  Jeanhenry  [/font][font face="Times-Roman" size="9.500"]und  Mitunterzeichnern,  vom  16.  Juni  1893.  [/font]
+[font face="Times-Bold" size="10.000"]62.  n Petition des  [/font][font face="Times-Bold" size="11.000"]Schweiz.  [/font][font face="Times-Bold" size="10.000"]Typographenbundes.  [/font][font face="Times-Roman" size="9.500"]Bericht  des  Bundesrates  vom  19.  Juni  1893  (Bundesblatt  III.  544)  betr  [/font]
+[font face="Times-Bold" size="10.000"]63.  [/font][font face="Times-BoldItalic" size="10.500"]s [/font][font face="Times-Bold" size="10.000"]Getreide und  Fouragemagazine  in  Bern.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom 19.  Juni  1893  (Bundesblatt III. 539)  [/font]
+[font face="Times-Bold" size="10.000"]64.  [/font][font face="Times-BoldItalic" size="10.500"]s [/font][font face="Times-Bold" size="10.000"]Postund  Telegraphengebäude  in  Zürich.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  Beschlusses-Entwurf  vom 19.  Juni 1893 (Bundesblatt III  534)  [/font]
+[font face="Times-Bold" size="10.000"]65.  Motion  von  Hrn.  Ständerat  Héridier,  [/font][font face="Times-Roman" size="9.500"]vom  21. Juni  1893.  [/font]
+[font face="Times-Bold" size="10.000"]66.  [/font][font face="Times-BoldItalic" size="10.500"]n [/font][font face="Times-Bold" size="10.000"]Vereinbarung  mit  Frankreich  betr.  die  Handelsreisenden.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und  [/font][font face="Times-Roman" size="9.000"]Beschlusses[/font][font face="Times-Roman" size="9.500"]-Entwurf  vom 19. Juni  1893  [/font]
+[font face="Times-Bold" size="10.000"]67.  Bundesgericht,  Neubestellung.  [/font]
+[font face="Times-Bold" size="10.000"]68.  8 Nachtragscredite  für  das  Bundesgericht  pro  1893.  [/font][font face="Times-Roman" size="9.500"]Botschaft  und [/font][font face="Times-Roman" size="7.500"]Beschlusses-[/font][font face="Times-Roman" size="9.500"]Entwurf  vom 23.  Juni 1893  (Bundesblatt  [/font]
+[font face="Times-Bold" size="10.000"]69.  s Reorganisation  und  Besoldung  der  Beamten  des  Eisenbahndepartements.  [/font]
+[font face="Times-Bold" size="10.000"]70  [/font][font face="Times-Bold" size="10.500"]Motion  der  HH.  Nationalräte  Fonjallaz,  Ceresole  [/font][font face="Times-Roman" size="9.500"]und  Mitunterzeichner,  vom  [/font][font face="Times-Bold" size="10.500"]26.  [/font][font face="Times-Roman" size="9.500"]Juni  [/font][font face="Times-Bold" size="10.500"]1893.  [/font]
diff --git a/src/python/utils_pandas.py b/src/python/utils_pandas.py
index 2b7f1b727bc1e3125da392531d669eb8bf5fe0f3..d7a9fd5c047e9af702ae107d0928d0483dd17613 100644
--- a/src/python/utils_pandas.py
+++ b/src/python/utils_pandas.py
@@ -47,6 +47,7 @@ def split_in_data_and_id(data_frame, id_col_names):
 def rows_unique(df, on):
     unique_elems = df[on].drop_duplicates()
     all_elems_unique = (len(unique_elems) == len(df))
+    print('Length original df {} and after dropping {}'.format(len(df), len(unique_elems)))
     return all_elems_unique
 
 def one_hot_dataframe_to_y(df, class_column_names):