diff --git a/src/python/def_classes.py b/src/python/def_classes.py index de0c2c8bdf8bbe482c5fdd726fd7de4a724d849e..ba30e380767cdca52ddcbcf21f4aecf2ea829a8b 100644 --- a/src/python/def_classes.py +++ b/src/python/def_classes.py @@ -545,10 +545,39 @@ class Document: if flag_error: print(str(ind_page) + ': non existing page!') else: - im_met = plot_tools.plot_labelled_boxes(imarray,label_textlines, list_allcoords_textlines) + im_met, groups, colors = plot_tools.plot_labelled_boxes(imarray,label_textlines, list_allcoords_textlines) + + in_coord = 0 + coords = in_coord + np.array([0, 0, 10, 10]) + inc_page = 20 + flag_notinto = 1 + for ind_g, i_g in enumerate(groups): + if ind_g >= int(len(groups)/2) and flag_notinto: + flag_notinto = 0 + coords[0] = in_coord + coords[1] += int(im_met.shape[1]/1.5) + coords[2] = in_coord + 10 + coords[3] += int(im_met.shape[1]/1.5) + im_met = plot_tools.lines_box(im_met, coords, colors[ind_g], thick_line = 6) + coords[0] += inc_page + coords[2] += inc_page + self._plot_save(im_met, 'Textboxes labelled', 'TextbLabel', ind_page, self.path_file, - flag_plot, flag_save_figs) + flag_plot, flag_save_figs) + coords = in_coord + np.array([0, 0, 10, 10]) + flag_notinto = 1 + for ind_g, i_g in enumerate(groups): + if ind_g >= int(len(groups)/2) and flag_notinto: + flag_notinto = 0 + coords[0] = in_coord + coords[1] += int(im_met.shape[1]/1.5) + coords[2] = in_coord + 10 + coords[3] += int(im_met.shape[1]/1.5) + plt.text(coords[1] + 10, coords[2], i_g, fontsize = 10, va = 'bottom', ha = 'left') + coords[0] += inc_page + coords[2] += inc_page + def plot_textl_ordered(self, range_pages = range(1), suffix_xml = '_data', flag_plot = 1, flag_save_figs = 0, name_outxml = '02_extractedxml', diff --git a/src/python/plot_tools.py b/src/python/plot_tools.py index a2079b6ac74551a9597e0e26408b817ed4045670..da5b6c68155f4b8b3e5c324ddf7ce7fc6e861cb4 100644 --- a/src/python/plot_tools.py +++ b/src/python/plot_tools.py @@ -13,6 +13,7 @@ import numpy as np import os import pickle import re +from PIL import Image, ImageDraw, ImageFont import matplotlib.pyplot as plt @@ -24,7 +25,7 @@ def lines_box(img, coord, color_vec, thick_line): # corners in topleftbottomright, and rows and colums if isinstance(color_vec,str): color_vec = np.uint8(np.array(Color(color_vec).rgb)*255) - + #print(type(color_vec[0])) img[(coord[0]):(coord[0]+thick_line),coord[1]:coord[3],:] = color_vec img[(coord[2]-thick_line):(coord[2]),coord[1]:coord[3],:] = color_vec img[coord[0]:coord[2],(coord[1]):(coord[1]+thick_line),:] = color_vec @@ -165,22 +166,18 @@ def plot_labelled_boxes(img,label_textlines, list_allcoords_textlines, thick_l = for ind_t in label_textlines['text_inheader']: img_boxes = lines_box(img_boxes, list_allcoords_textlines[:,ind_t].astype(np.uint32), color_vec = 'yellow', thick_line = thick_l) + color_nid = np.uint8(np.array([127, 0, 180])) if 'notidentified' in label_textlines.keys(): for ind_t in label_textlines['notidentified']: img_boxes = lines_box(img_boxes, list_allcoords_textlines[:,ind_t].astype(np.uint32), - color_vec = [0.5, 0, 0.7], thick_line = thick_l) + color_vec = color_nid, thick_line = thick_l) groups = ['Footnote', 'Header', 'Header of single col', 'Page number', '1st column', '2nd column', 'Text in header', 'Not Identified'] - colors = ['red', 'blue', 'black', 'green', 'magenta', 'cyan', 'yellow', [0.5, 0, 0.7]] - ''' - coords = [30, 30, 40, 40] - inc_page = img_boxes.shape[1]/len(groups) - for ind_g, i_g in enumerate(groups): - legend_toppage(i_g, colors[ind_g], xin, yin) - coords[0] += inc_page - coords[1] += inc_page - ''' - return img_boxes + colors = ['red', 'blue', 'black', 'green', 'magenta', 'cyan', 'yellow', color_nid] + + + + return img_boxes, groups, colors def plot_margins(img, side_margins, topb_margins, gap_line = 2, thick_l = 6): @@ -266,9 +263,3 @@ def plot_save_parallel(folder_pickles): name_fig = full_filename[:-3] + format_fig fig.savefig(name_fig, format = format_fig, dpi = 200) plt.close(fig) - -def legend_toppage(text_l, color_l, coord, img): - lines_box(img, coord, color_l, thick_line = 6) - #plt.Circle((x, y), 3, color = color_l) - plt.text(coord[0] + 4, coord[1], text_l, fontsize = 7, va = 'bottom', ha = 'left') - pass \ No newline at end of file