Skip to content
Snippets Groups Projects
Commit bb284002 authored by Miguel Angel Reina Ortega's avatar Miguel Angel Reina Ortega
Browse files

Some cleanup for handling of grid tables

parent 708d9fb8
Branches
No related tags found
No related merge requests found
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
# directory structure. # directory structure.
# #
from __future__ import annotations from __future__ import annotations
import logging
from enum import Enum, auto from enum import Enum, auto
import argparse, re, os, shutil, hashlib, base64 import argparse, re, os, shutil, hashlib, base64
from dataclasses import dataclass from dataclasses import dataclass
...@@ -485,6 +487,19 @@ def parse_pandoc_table_with_spans(pandoc_table): ...@@ -485,6 +487,19 @@ def parse_pandoc_table_with_spans(pandoc_table):
self.list_flag = False self.list_flag = False
self.auxiliar_index = None self.auxiliar_index = None
def set_alignment(self):
header_delimiter_index = 0
while header_delimiter_index in range(len(default_alignments)) and self.position > header_delimiter_positions[header_delimiter_index]:
header_delimiter_index += 1
if header_delimiter_index in range(len(default_alignments)):
if self.position < header_delimiter_positions[header_delimiter_index]:
self.alignment = default_alignments[header_delimiter_index]
elif self.position == header_delimiter_positions[header_delimiter_index]:
self.alignment = default_alignments[header_delimiter_index]
header_delimiter_index += 1
else:
raise ValueError("Invalid table formatting")
class Row(): class Row():
""" Represents a row in the markdown file. """ """ Represents a row in the markdown file. """
cells:list[Cell] = [] cells:list[Cell] = []
...@@ -492,6 +507,12 @@ def parse_pandoc_table_with_spans(pandoc_table): ...@@ -492,6 +507,12 @@ def parse_pandoc_table_with_spans(pandoc_table):
def __init__(self, length: int = 1) -> None: def __init__(self, length: int = 1) -> None:
self.cells = [Cell() for _ in range(length)] self.cells = [Cell() for _ in range(length)]
def __getitem__(self, item):
return self.cells[item]
def __setitem__(self, key, value):
self.cells[key] = value
# Detect separator lines by pattern (it does not take into account partial separators # Detect separator lines by pattern (it does not take into account partial separators
def is_separator(line): def is_separator(line):
return _matchGridTableSeparator.match(line) return _matchGridTableSeparator.match(line)
...@@ -573,7 +594,7 @@ def parse_pandoc_table_with_spans(pandoc_table): ...@@ -573,7 +594,7 @@ def parse_pandoc_table_with_spans(pandoc_table):
has_header = True has_header = True
header_separator_index = index header_separator_index = index
header_rows = [] header_rows = []
parts = re.split(r"\s*\+\s*", lines[index].strip("+")) parts = re.split(r"\+", lines[index].strip("+"))
default_alignments = [] default_alignments = []
#Calculate default alignments and positions of delimiters #Calculate default alignments and positions of delimiters
for part_index in range(len(parts)): for part_index in range(len(parts)):
...@@ -592,9 +613,6 @@ def parse_pandoc_table_with_spans(pandoc_table): ...@@ -592,9 +613,6 @@ def parse_pandoc_table_with_spans(pandoc_table):
for row in range(len(separator_indices) - 1): for row in range(len(separator_indices) - 1):
table_row = [] table_row = []
auxiliar_rows = [] auxiliar_rows = []
auxiliar_row = []
use_auxiliar_row = []
list_flags = []
has_merged_cells = False has_merged_cells = False
in_data_row = False in_data_row = False
start, end = separator_indices[row], separator_indices[row + 1] start, end = separator_indices[row], separator_indices[row + 1]
...@@ -623,45 +641,31 @@ def parse_pandoc_table_with_spans(pandoc_table): ...@@ -623,45 +641,31 @@ def parse_pandoc_table_with_spans(pandoc_table):
table_row = Row(number_of_columns_row) table_row = Row(number_of_columns_row)
for i in range(number_of_columns_row): for i in range(number_of_columns_row):
delimiter_index += len(parts[i]) + 1 delimiter_index += len(parts[i]) + 1
table_row.cells[i].alignment = default_alignments[i] if i == 0 else "align=\"center\"" table_row[i].alignment = default_alignments[i] if i == 0 else "align=\"center\""
table_row.cells[i].position = delimiter_index # Position of cell delimiter + table_row[i].position = delimiter_index # Position of cell delimiter +
#Set alignment as defined by header separator line #Set alignment as defined by header separator line
while header_delimiter_index in range(len(default_alignments)) and table_row.cells[i].position > header_delimiter_positions[header_delimiter_index]: table_row[i].set_alignment()
header_delimiter_index += 1
if header_delimiter_index in range(len(default_alignments)):
if table_row.cells[i].position < header_delimiter_positions[header_delimiter_index]:
table_row.cells[i].alignment = default_alignments[header_delimiter_index]
elif table_row.cells[i].position == header_delimiter_positions[header_delimiter_index]:
table_row.cells[i].alignment = default_alignments[i]
header_delimiter_index += 1
else:
raise ValueError("Invalid table formatting")
#auxiliar_row = Row(number_of_columns)
#for i in range(number_of_columns):
#auxiliar_row.append(default_cell)
#use_auxiliar_row.append(False)
#auxiliar_rows.append({'auxiliar_row':auxiliar_row, 'use_auxiliar':use_auxiliar_row, 'list_flags':list_flags})
elif in_data_row: elif in_data_row:
# Regular data row or partial separator # Regular data row or partial separator
if _matchGridTableBodySeparator.match(line): # Partial separator if _matchGridTableBodySeparator.match(line): # Partial separator
has_merged_cells = True has_merged_cells = True
cells = re.split(r"[\|\+]", line.strip("|").strip("+")) # (?<!\\)[\|\+]
#Add auxiliar line, set delimiters for each cell #Add auxiliar line, set delimiters for each cell
auxiliar_rows.append(Row(number_of_columns)) auxiliar_rows.append(Row(number_of_columns))
aux_delimiter_index = 0 aux_delimiter_index = 0
for i in range(number_of_columns_row): for auxiliar_cell_index in range(number_of_columns):
aux_delimiter_index += len(parts[i]) + 1 aux_delimiter_index += len(cells[auxiliar_cell_index]) + 1
auxiliar_rows[-1].cells[i].position = aux_delimiter_index # Position of cell delimiter + auxiliar_rows[-1][auxiliar_cell_index].position = aux_delimiter_index # Position of cell delimiter +
auxiliar_rows[-1][i].set_alignment()
cells = re.split(r"\s*[\|\+]\s*", line.strip("|").strip("+")) # (?<!\\)[\|\+]
if len(cells) <= number_of_columns: # Colspan: Positions of | with respect to + need to be determined if len(cells) <= number_of_columns: # Colspan: Positions of | with respect to + need to be determined
for i in range(len(cells)): for i in range(len(cells)):
if _matchGridTableBodySeparatorLine.match(cells[i]): # A new row is to be added if _matchGridTableBodySeparatorLine.match(cells[i]): # A new row is to be added
#auxiliar_rows[-1]['use_auxiliar_row'][i] = True #auxiliar_rows[-1]['use_auxiliar_row'][i] = True
auxiliar_rows[-1].cells[i].list_flag = False auxiliar_rows[-1][i].list_flag = False
table_row.cells[i].auxiliar_index = len(auxiliar_rows)-1 table_row[i].auxiliar_index = len(auxiliar_rows)-1
#if cells[i].startswith(":") and not cells[i].endswith(":"): #if cells[i].startswith(":") and not cells[i].endswith(":"):
# auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"left\"" # auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"left\""
#elif not cells[i].startswith(":") and cells[i].endswith(":"): #elif not cells[i].startswith(":") and cells[i].endswith(":"):
...@@ -670,37 +674,20 @@ def parse_pandoc_table_with_spans(pandoc_table): ...@@ -670,37 +674,20 @@ def parse_pandoc_table_with_spans(pandoc_table):
# auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"center\"" # auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"center\""
else: else:
# Handle content of the cell # Handle content of the cell
if table_row.cells[i].auxiliar_index is not None: # and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]: if table_row[i].auxiliar_index is not None: # and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
auxiliar_rows[table_row.cells[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index][i], cells[i]) auxiliar_rows[table_row[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row[i].auxiliar_index][i], cells[i])
if not auxiliar_rows[table_row.cells[i].auxiliar_index][i].colspan_adjusted: if not auxiliar_rows[table_row[i].auxiliar_index][i].colspan_adjusted:
auxiliar_rows[table_row.cells[i].auxiliar_index][i].colspan_adjusted = True auxiliar_rows[table_row[i].auxiliar_index][i].colspan_adjusted = True
# TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator # TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
auxiliar_rows[table_row.cells[i].auxiliar_index][i] = adjust_colspan(auxiliar_rows[table_row.cells[i].auxiliar_index], i, len(cells), line, number_of_columns, delimiter_positions) auxiliar_rows[table_row[i].auxiliar_index][i] = adjust_colspan(auxiliar_rows[table_row[i].auxiliar_index], i, len(cells), line, number_of_columns, delimiter_positions)
else: else:
table_row.cells[i] = handling_content(table_row.cells[i], cells[i]) table_row[i] = handling_content(table_row[i], cells[i])
# Cell which is not separator # Cell which is not separator
table_row.cells[i].rowspan += 1 table_row[i].rowspan += 1
if not table_row.cells[i].colspan_adjusted: if not table_row.cells[i].colspan_adjusted:
table_row.cells[i].colspan_adjusted = True table_row[i].colspan_adjusted = True
#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator #TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
table_row.cells[i] = adjust_colspan(table_row.cells, i, len(cells), line, number_of_columns, delimiter_positions) table_row[i] = adjust_colspan(table_row, i, len(cells), line, number_of_columns, delimiter_positions)
#elif len(cells) == number_of_columns: # Simple row with partial separator, # A new row is to be added
# for i in range(len(cells)):
# if _matchGridTableBodySeparatorLine.match(cells[i]): # Update cell in new row
# use_auxiliar_row[i] = True
# list_flags[i] = False
# if cells[i].startswith(":") and not cells[i].endswith(":"):
# auxiliar_row[i]['alignment'] = "align=\"left\""
# elif not cells[i].startswith(":") and cells[i].endswith(":"):
# auxiliar_row[i]['alignment'] = "align=\"right\""
# else:
# auxiliar_row[i]['alignment'] = "align=\"center\""
# else:
# #Handle content of the cell
# list_flags[i], table_row[i] = handling_content(table_row[i], cells[i],list_flags[i])
# # Cell which is not separator
# table_row[i]['rowspan'] += 1
# # Adjusting of colspan not needed, no colspan as number of cells is equal to number of columns
else: else:
raise ValueError("More cells than columns found") raise ValueError("More cells than columns found")
else: # Data row else: # Data row
...@@ -708,30 +695,29 @@ def parse_pandoc_table_with_spans(pandoc_table): ...@@ -708,30 +695,29 @@ def parse_pandoc_table_with_spans(pandoc_table):
if len(cells) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined if len(cells) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined
for i in range(len(cells)): for i in range(len(cells)):
# Handle content of the cell # Handle content of the cell
if table_row.cells[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]: if table_row[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i], cells[i]) auxiliar_rows[table_row.cells[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row[i].auxiliar_index][i], cells[i])
if not auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i].colspan_adjusted: if not auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i].colspan_adjusted:
auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i].colspan_adjusted = True auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i].colspan_adjusted = True
#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator #TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = adjust_colspan(auxiliar_rows[table_row.cells[i].auxiliar_index].cells, i, len(cells), line, number_of_columns, delimiter_positions) auxiliar_rows[table_row[i].auxiliar_index][i] = adjust_colspan(auxiliar_rows[table_row[i].auxiliar_index].cells, i, len(cells), line, number_of_columns, delimiter_positions)
else: else:
table_row.cells[i] = handling_content(table_row.cells[i], cells[i]) table_row[i] = handling_content(table_row[i], cells[i])
if not table_row.cells[i].colspan_adjusted: if not table_row.cells[i].colspan_adjusted:
table_row.cells[i].colspan_adjusted = True table_row[i].colspan_adjusted = True
table_row.cells[i] = adjust_colspan(table_row.cells, i, len(cells), line, number_of_columns, delimiter_positions) table_row[i] = adjust_colspan(table_row.cells, i, len(cells), line, number_of_columns, delimiter_positions)
elif len(cells) == number_of_columns: # Simple row elif len(cells) == number_of_columns: # Simple row
for i in range(len(cells)): for i in range(len(cells)):
if table_row.cells[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]: if table_row[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i], cells[i]) auxiliar_rows[table_row[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row[i].auxiliar_index][i], cells[i])
else: else:
# Handle content of the cell # Handle content of the cell
table_row.cells[i] = handling_content(table_row.cells[i], cells[i]) table_row[i] = handling_content(table_row[i], cells[i])
else: else:
raise ValueError("More cells than columns found") raise ValueError("More cells than columns found")
else: else:
raise ValueError("No separator line found for row starting") raise ValueError("No separator line found for row starting")
if has_header and start >= header_separator_index: # table_row and auxiliar_row are part of data_rows if has_header and start >= header_separator_index: # table_row and auxiliar_row are part of data_rows
data_rows.append(table_row.cells) data_rows.append(table_row.cells)
if has_merged_cells: if has_merged_cells:
...@@ -759,7 +745,7 @@ def parse_pandoc_table_with_spans(pandoc_table): ...@@ -759,7 +745,7 @@ def parse_pandoc_table_with_spans(pandoc_table):
for cell in row: for cell in row:
if cell.content is not None: if cell.content is not None:
# Replacing "<" by &lt; # Replacing "<" by &lt;
cell.content = cell.content.replace("<", "&lt;") #cell.content = cell.content.replace("<", "&lt;")
#Bold #Bold
for bold_characters in ["**", "__"]: for bold_characters in ["**", "__"]:
...@@ -828,8 +814,12 @@ def generate_html_table_with_spans(pandoc_table): ...@@ -828,8 +814,12 @@ def generate_html_table_with_spans(pandoc_table):
:param pandoc_table: String of the Pandoc-style grid table. :param pandoc_table: String of the Pandoc-style grid table.
:return: HTML string. :return: HTML string.
""" """
try:
grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table) grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table)
except:
logging.ERROR("Grid table could not be generated")
return "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS"
else:
html = "<table>\n" html = "<table>\n"
has_header = False has_header = False
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment