Skip to content
Snippets Groups Projects
Commit 6387e137 authored by Luis Salamanca's avatar Luis Salamanca
Browse files

More errors when parsing from corrected XML

parent 8d260488
No related branches found
No related tags found
No related merge requests found
Pipeline #1727 passed
This diff is collapsed.
......@@ -402,7 +402,7 @@ class Document:
if 'imgobj' not in self.__dict__.keys():
self.pdf2imgobj()
if 'XML_main' not in self.__dict__.keys():
if 'XML_main_corr' not in self.__dict__.keys():
name_tar = self.folder_database + '/' + str(self.year) + '/' + name_outcorrxml + '.tar.gz'
if os.path.isfile(name_tar):
name_xml = './' + str(self.year) + '/' + str(self.id_doc) + suffix_xml + 'corr.xml'
......@@ -411,11 +411,17 @@ class Document:
#print('Run this')
h_xml = utils_proc.get_handlerfile(name_xml, self.folder_database, name_outcorrxml)
XML_tree = ET.parse(h_xml)
self.XML_main = XML_tree.getroot()
self.XML_main_corr = XML_tree.getroot()
else:
self.pdf2xml(pages = 'all', suffix_xml = suffix_xml, flag_save = 0)
print('You need to have the tar file to use flag_compute = 0!')
flag_error = 1
return 0, 0, 0, 0, 0, 0, flag_error
#self.pdf2xml(pages = 'all', suffix_xml = suffix_xml, flag_save = 0)
else:
self.pdf2xml(pages = 'all', suffix_xml = suffix_xml, flag_save = 0)
print('You need to have the tar file to use flag_compute = 0!')
flag_error = 1
return 0, 0, 0, 0, 0, 0, flag_error
#self.pdf2xml(pages = 'all', suffix_xml = suffix_xml, flag_save = 0)
ind_abs = np.array([ind_page]).astype(int).reshape((-1,))
else:
#print('Run this')
......@@ -425,12 +431,12 @@ class Document:
#print(ind_abs, type(ind_abs))
#print(self.XML_main, len(self.imgobj))
if ind_page > (len(self.XML_main) - 1):
if ind_page > (len(self.XML_main_corr) - 1):
flag_error = 1
return 0, 0, 0, 0, 0, 0, flag_error
XML_root = ET.Element('pages')
XML_root.append(self.XML_main[ind_abs[0]])
XML_root.append(self.XML_main_corr[ind_abs[0]])
imarray = np.array(self.imgobj[ind_page])
bbox_page = np.array(XML_root[0].attrib['bbox'].split(',')).astype(np.float64)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment