Some cleanup for handling of grid tables

bb284002 · Miguel Angel Reina Ortega · 708d9fb8 · bb284002
Commit bb284002 authored 7 months ago by Miguel Angel Reina Ortega
--- a/toMkdocs/toMkdocs.py
+++ b/toMkdocs/toMkdocs.py
@@ -7,6 +7,8 @@
 #	directory structure.
 #
 from __future__ import annotations
+import logging
 from enum import Enum, auto
 import argparse, re, os, shutil, hashlib, base64
 from dataclasses import dataclass
@@ -485,6 +487,19 @@ def parse_pandoc_table_with_spans(pandoc_table):
 			self.list_flag = False
 			self.auxiliar_index = None
+		def set_alignment(self):
+			header_delimiter_index = 0
+			while header_delimiter_index in range(len(default_alignments)) and self.position > header_delimiter_positions[header_delimiter_index]:
+				header_delimiter_index += 1
+			if header_delimiter_index in range(len(default_alignments)):
+				if self.position < header_delimiter_positions[header_delimiter_index]:
+					self.alignment = default_alignments[header_delimiter_index]
+				elif self.position == header_delimiter_positions[header_delimiter_index]:
+					self.alignment = default_alignments[header_delimiter_index]
+					header_delimiter_index += 1
+			else:
+				raise ValueError("Invalid table formatting")
 	class Row():
 		"""	Represents a row in the markdown file. """
 		cells:list[Cell] = []
@@ -492,6 +507,12 @@ def parse_pandoc_table_with_spans(pandoc_table):
 		def __init__(self, length: int = 1) -> None:
 			self.cells = [Cell() for _ in range(length)]
+		def __getitem__(self, item):
+			return self.cells[item]
+		def __setitem__(self, key, value):
+			self.cells[key] = value
 	# Detect separator lines by pattern (it does not take into account partial separators
 	def is_separator(line):
 		return _matchGridTableSeparator.match(line)
@@ -573,7 +594,7 @@ def parse_pandoc_table_with_spans(pandoc_table):
 			has_header = True
 			header_separator_index = index
 			header_rows = []
-			parts = re.split(r"\s*\+\s*", lines[index].strip("+"))
+			parts = re.split(r"\+", lines[index].strip("+"))
 			default_alignments = []
 			#Calculate default alignments and positions of delimiters
 			for part_index in range(len(parts)):
@@ -592,9 +613,6 @@ def parse_pandoc_table_with_spans(pandoc_table):
 	for row in range(len(separator_indices) - 1):
 		table_row = []
 		auxiliar_rows = []
-		auxiliar_row = []
-		use_auxiliar_row = []
-		list_flags = []
 		has_merged_cells = False
 		in_data_row = False
 		start, end = separator_indices[row], separator_indices[row + 1]
@@ -623,45 +641,31 @@ def parse_pandoc_table_with_spans(pandoc_table):
 					table_row = Row(number_of_columns_row)
 					for i in range(number_of_columns_row):
 						delimiter_index += len(parts[i]) + 1
-						table_row.cells[i].alignment = default_alignments[i] if i == 0 else "align=\"center\""
+						table_row[i].alignment = default_alignments[i] if i == 0 else "align=\"center\""
-						table_row.cells[i].position = delimiter_index # Position of cell delimiter +
+						table_row[i].position = delimiter_index # Position of cell delimiter +
 						#Set alignment as defined by header separator line
-						while header_delimiter_index in range(len(default_alignments)) and table_row.cells[i].position > header_delimiter_positions[header_delimiter_index]:
+						table_row[i].set_alignment()
-							header_delimiter_index += 1
-						if header_delimiter_index in range(len(default_alignments)):
-							if table_row.cells[i].position < header_delimiter_positions[header_delimiter_index]:
-								table_row.cells[i].alignment = default_alignments[header_delimiter_index]
-							elif table_row.cells[i].position == header_delimiter_positions[header_delimiter_index]:
-								table_row.cells[i].alignment = default_alignments[i]
-								header_delimiter_index += 1
-						else:
-							raise ValueError("Invalid table formatting")
-					#auxiliar_row = Row(number_of_columns)
-					#for i in range(number_of_columns):
-						#auxiliar_row.append(default_cell)
-						#use_auxiliar_row.append(False)
-						#auxiliar_rows.append({'auxiliar_row':auxiliar_row, 'use_auxiliar':use_auxiliar_row, 'list_flags':list_flags})
 				elif in_data_row:
 					# Regular data row or partial separator
 					if _matchGridTableBodySeparator.match(line): # Partial separator
 						has_merged_cells = True
+						cells = re.split(r"[\|\+]", line.strip("|").strip("+"))  # (?<!\\)[\|\+]
 						#Add auxiliar line, set delimiters for each cell
 						auxiliar_rows.append(Row(number_of_columns))
 						aux_delimiter_index = 0
-						for i in range(number_of_columns_row):
+						for auxiliar_cell_index in range(number_of_columns):
-							aux_delimiter_index += len(parts[i]) + 1
+							aux_delimiter_index += len(cells[auxiliar_cell_index]) + 1
-							auxiliar_rows[-1].cells[i].position = aux_delimiter_index  # Position of cell delimiter +
+							auxiliar_rows[-1][auxiliar_cell_index].position = aux_delimiter_index  # Position of cell delimiter +
+						auxiliar_rows[-1][i].set_alignment()
-						cells = re.split(r"\s*[\|\+]\s*", line.strip("|").strip("+")) # (?<!\\)[\|\+]
 						if len(cells) <= number_of_columns: # Colspan: Positions of | with respect to + need to be determined
 							for i in range(len(cells)):
 								if _matchGridTableBodySeparatorLine.match(cells[i]):  # A new row is to be added
 									#auxiliar_rows[-1]['use_auxiliar_row'][i] = True
-									auxiliar_rows[-1].cells[i].list_flag = False
+									auxiliar_rows[-1][i].list_flag = False
-									table_row.cells[i].auxiliar_index = len(auxiliar_rows)-1
+									table_row[i].auxiliar_index = len(auxiliar_rows)-1
 									#if cells[i].startswith(":") and not cells[i].endswith(":"):
 									#	auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"left\""
 									#elif not cells[i].startswith(":") and  cells[i].endswith(":"):
@@ -670,37 +674,20 @@ def parse_pandoc_table_with_spans(pandoc_table):
 									#	auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"center\""
 								else:
 									# Handle content of the cell
-									if table_row.cells[i].auxiliar_index is not None: # and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
+									if table_row[i].auxiliar_index is not None: # and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
-										auxiliar_rows[table_row.cells[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index][i], cells[i])
+										auxiliar_rows[table_row[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row[i].auxiliar_index][i], cells[i])
-										if not auxiliar_rows[table_row.cells[i].auxiliar_index][i].colspan_adjusted:
+										if not auxiliar_rows[table_row[i].auxiliar_index][i].colspan_adjusted:
-											auxiliar_rows[table_row.cells[i].auxiliar_index][i].colspan_adjusted = True
+											auxiliar_rows[table_row[i].auxiliar_index][i].colspan_adjusted = True
 											# TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
-											auxiliar_rows[table_row.cells[i].auxiliar_index][i] = adjust_colspan(auxiliar_rows[table_row.cells[i].auxiliar_index], i, len(cells), line, number_of_columns, delimiter_positions)
+											auxiliar_rows[table_row[i].auxiliar_index][i] = adjust_colspan(auxiliar_rows[table_row[i].auxiliar_index], i, len(cells), line, number_of_columns, delimiter_positions)
 									else:
-										table_row.cells[i] = handling_content(table_row.cells[i], cells[i])
+										table_row[i] = handling_content(table_row[i], cells[i])
 										# Cell which is not separator
-										table_row.cells[i].rowspan += 1
+										table_row[i].rowspan += 1
 										if not table_row.cells[i].colspan_adjusted:
-											table_row.cells[i].colspan_adjusted = True
+											table_row[i].colspan_adjusted = True
 											#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
-											table_row.cells[i] = adjust_colspan(table_row.cells, i, len(cells), line, number_of_columns, delimiter_positions)
+											table_row[i] = adjust_colspan(table_row, i, len(cells), line, number_of_columns, delimiter_positions)
-						#elif len(cells) == number_of_columns: # Simple row with partial separator, # A new row is to be added
-						#	for i in range(len(cells)):
-						#		if _matchGridTableBodySeparatorLine.match(cells[i]):  # Update cell in new row
-						#			use_auxiliar_row[i] = True
-						#			list_flags[i] = False
-						#			if cells[i].startswith(":") and not cells[i].endswith(":"):
-						#				auxiliar_row[i]['alignment'] = "align=\"left\""
-						#			elif not cells[i].startswith(":") and  cells[i].endswith(":"):
-						#				auxiliar_row[i]['alignment'] = "align=\"right\""
-						#			else:
-						#				auxiliar_row[i]['alignment'] = "align=\"center\""
-						#		else:
-						#			#Handle content of the cell
-						#			list_flags[i], table_row[i] = handling_content(table_row[i], cells[i],list_flags[i])
-						#			# Cell which is not separator
-						#			table_row[i]['rowspan'] += 1
-						#			# Adjusting of colspan not needed, no colspan as number of cells is equal to number of columns
 						else:
 							raise ValueError("More cells than columns found")
 					else: # Data row
@@ -708,30 +695,29 @@ def parse_pandoc_table_with_spans(pandoc_table):
 						if len(cells) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined
 							for i in range(len(cells)):
 								# Handle content of the cell
-								if table_row.cells[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
+								if table_row[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
-									auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i], cells[i])
+									auxiliar_rows[table_row.cells[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row[i].auxiliar_index][i], cells[i])
 									if not auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i].colspan_adjusted:
 										auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i].colspan_adjusted = True
 										#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
-										auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = adjust_colspan(auxiliar_rows[table_row.cells[i].auxiliar_index].cells, i, len(cells), line, number_of_columns, delimiter_positions)
+										auxiliar_rows[table_row[i].auxiliar_index][i] = adjust_colspan(auxiliar_rows[table_row[i].auxiliar_index].cells, i, len(cells), line, number_of_columns, delimiter_positions)
 								else:
-									table_row.cells[i] = handling_content(table_row.cells[i], cells[i])
+									table_row[i] = handling_content(table_row[i], cells[i])
 									if not table_row.cells[i].colspan_adjusted:
-										table_row.cells[i].colspan_adjusted = True
+										table_row[i].colspan_adjusted = True
-										table_row.cells[i] = adjust_colspan(table_row.cells, i, len(cells), line, number_of_columns, delimiter_positions)
+										table_row[i] = adjust_colspan(table_row.cells, i, len(cells), line, number_of_columns, delimiter_positions)
 						elif len(cells) == number_of_columns: # Simple row
 							for i in range(len(cells)):
-								if table_row.cells[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
+								if table_row[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
-									auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i], cells[i])
+									auxiliar_rows[table_row[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row[i].auxiliar_index][i], cells[i])
 								else:
 									# Handle content of the cell
-									table_row.cells[i] = handling_content(table_row.cells[i], cells[i])
+									table_row[i] = handling_content(table_row[i], cells[i])
 						else:
 							raise ValueError("More cells than columns found")
 				else:
 					raise ValueError("No separator line found for row starting")
 			if has_header and start >= header_separator_index: # table_row and auxiliar_row are part of data_rows
 				data_rows.append(table_row.cells)
 				if has_merged_cells:
@@ -759,7 +745,7 @@ def parse_pandoc_table_with_spans(pandoc_table):
 			for cell in row:
 				if cell.content is not None:
 					# Replacing "<" by &lt;
-					cell.content = cell.content.replace("<", "&lt;")
+					#cell.content = cell.content.replace("<", "&lt;")
 					#Bold
 					for bold_characters in ["**", "__"]:
@@ -828,8 +814,12 @@ def generate_html_table_with_spans(pandoc_table):
 	:param pandoc_table: String of the Pandoc-style grid table.
 	:return: HTML string.
 	"""
+	try:
 		grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table)
+	except:
+		logging.ERROR("Grid table could not be generated")
+		return "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS"
+	else:
 		html = "<table>\n"
 		has_header = False