From d15b1ca019f39184e45281e9253284d160a01944 Mon Sep 17 00:00:00 2001
From: Miguel Angel Reina Ortega <miguelangel.reinaortega@etsi.org>
Date: Fri, 6 Dec 2024 08:32:28 +0100
Subject: [PATCH] Change algorithm to convert grid tables + compacting code

---
 toMkdocs/toMkdocs.py | 179 +++++++++++++++++++++----------------------
 1 file changed, 88 insertions(+), 91 deletions(-)

diff --git a/toMkdocs/toMkdocs.py b/toMkdocs/toMkdocs.py
index c5cb22c..ae0be2a 100644
--- a/toMkdocs/toMkdocs.py
+++ b/toMkdocs/toMkdocs.py
@@ -484,7 +484,6 @@ def parse_pandoc_table_with_spans(pandoc_table):
 			self.alignment = "align=\"center\""
 			self.position = None
 			self.list_flag = False
-			self.auxiliar_index = None
 
 		def set_alignment(self):
 			header_delimiter_index = 0
@@ -512,10 +511,22 @@ def parse_pandoc_table_with_spans(pandoc_table):
 		def __setitem__(self, key, value):
 			self.cells[key] = value
 
+	class RowTracker():
+		"""	Represents the document object. """
+		def __init__(self, items):
+			self.rowTracker = [0 for _ in range(items)]
+
+		def __getitem__(self, item):
+			return self.rowTracker[item]
+
+		def __setitem__(self, key, value):
+			self.rowTracker[key] = value
+
 	# Detect separator lines by pattern (it does not take into account partial separators
 	def is_separator(line):
 		return _matchGridTableSeparator.match(line)
 
+	# Set content on the cell - concatenating multilines, flagging lists
 	def handling_content(cell, content):
 		if cell.content is None:
 			cell.rowspan += 1
@@ -524,9 +535,9 @@ def parse_pandoc_table_with_spans(pandoc_table):
 				cell.list_flag = True
 				#print(content)
 				cell.content = content.strip() + "\n"  # Add newline to know when the list element ends
-			elif cell.list_flag and cells[i].strip() != "":  # any other content when handling list is concatenated to the last list element
+			elif cell.list_flag and content.strip() != "":  # any other content when handling list is concatenated to the last list element
 				cell.content += content.strip() + "\n"
-			elif cells[i].strip == "":  # separation between list and other paragraph
+			elif content.strip == "":  # separation between list and other paragraph
 				cell.list_flag = False
 				cell.content += "\n" #if not cell['content'].endswith("\n") else ""
 			else:
@@ -538,10 +549,10 @@ def parse_pandoc_table_with_spans(pandoc_table):
 					#cell['content'] = cell['content'].strip("\n")
 				cell.list_flag = True
 				cell.content += content.strip() + "\n"  # Add newline to know when the list element ends
-			elif cell.list_flag and cells[i].strip() != "":  # any other content when handling list is concatenated to the last list element
+			elif cell.list_flag and content.strip() != "":  # any other content when handling list is concatenated to the last list element
 				cell.content = cell.content.strip("\n")
 				cell.content += " " + content.strip() + "\n"
-			elif cells[i].strip() == "":  # separation between list and other paragraph
+			elif content.strip() == "":  # separation between list and other paragraph
 				cell.list_flag = False
 				#content = re.sub(r'\\\s*$', "\n", content.strip())
 				cell.content += "\n" if not cell.content.endswith("\n") else ""
@@ -551,6 +562,7 @@ def parse_pandoc_table_with_spans(pandoc_table):
 		#print(cell['content'])
 		return cell
 
+	# Adjust colspan of a cell
 	def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns, delimiter_positions):
 		for j in range(column_index, number_of_parts):
 			delimiter_start = None
@@ -614,9 +626,8 @@ def parse_pandoc_table_with_spans(pandoc_table):
 
 	data_rows = []
 	for row in range(len(separator_indices) - 1):
-		table_row = []
-		auxiliar_rows = []
-		has_merged_cells = False
+		rows = []
+		rows_tracker = []
 		in_data_row = False
 		start, end = separator_indices[row], separator_indices[row + 1]
 		row_lines = lines[start:end]  # Lines between separators including separator line start as it gives information about the number of columns of the row
@@ -624,10 +635,8 @@ def parse_pandoc_table_with_spans(pandoc_table):
 			# Combine multiline content into single strings for each cell
 			for line in row_lines:
 				if is_separator(line) and not in_data_row:
-					number_of_columns_row = line.count("+") - 1
 					in_data_row = True
 					parts = re.split(r"\s*\+\s*", line.strip("+"))
-					# Add as many cells as columns with span attributes
 					delimiter_index = 0
 					# Determine the alignment of the cell - In order to replicate Pandoc's behaviour (do not support of alignment colons on separator lines (just header separator)
 					# we need to assign the default alignment as defined in the header separator line
@@ -640,113 +649,101 @@ def parse_pandoc_table_with_spans(pandoc_table):
 					#		alignments.append("align=\"right\"")
 					#	else:
 					#		alignments.append("align=\"center\"")
-					header_delimiter_index = 0
-					table_row = Row(number_of_columns)
+					rows.append(Row(number_of_columns))
+					#rows_tracker = [RowTracker() for _ in range(number_of_columns)]
+					rows_tracker = RowTracker(number_of_columns)
 					i = 0
-					j = 0
-					while i in range(number_of_columns) and j in range(len(parts)):
-						delimiter_index += len(parts[j]) + 1
-						#table_row[i].alignment = default_alignments[i] if i == 0 else "align=\"center\""
-						table_row[i].position = delimiter_index # Position of cell delimiter +
-						#Set alignment as defined by header separator line
-						table_row[i].set_alignment()
-						while delimiter_index > delimiter_positions[i]:
+					for j in range(len(parts)):
+						if i in range(number_of_columns):
+							delimiter_index += len(parts[j]) + 1
+							# Set position
+							rows[-1][i].position = delimiter_index # Position of cell delimiter +
+							# Set alignment as defined by header separator line
+							rows[-1][i].set_alignment()
+							while delimiter_index > delimiter_positions[i]:
+								i += 1
 							i += 1
-						i += 1
-						j += 1
+
 				elif in_data_row:
 					# Regular data row or partial separator
 					if _matchGridTableBodySeparator.match(line): # Partial separator
-						has_merged_cells = True
-						cells = re.split(r"[\|\+]", line.strip("|").strip("+"))  # (?<!\\)[\|\+]
-						#Add auxiliar line, set delimiters for each cell
-						auxiliar_rows.append(Row(number_of_columns))
+						cells_content = re.split(r"[\|\+]", line.strip("|").strip("+"))  # (?<!\\)[\|\+]
+						#Add another row, set delimiters for each cell
+						rows.append(Row(number_of_columns))
 						aux_delimiter_index = 0
-						for auxiliar_cell_index in range(number_of_columns):
-							aux_delimiter_index += len(cells[auxiliar_cell_index]) + 1
-							auxiliar_rows[-1][auxiliar_cell_index].position = aux_delimiter_index  # Position of cell delimiter +
-							auxiliar_rows[-1][auxiliar_cell_index].set_alignment()
-
-						if len(cells) <= number_of_columns: # Colspan: Positions of | with respect to + need to be determined
-							table_row_index = 0
-							for i in range(len(cells)):
-								if _matchGridTableBodySeparatorLine.match(cells[i]):  # A new row is to be added
+						auxiliar_cell_index = 0
+						for i in range(len(cells_content)):
+							if auxiliar_cell_index in range(number_of_columns):
+								aux_delimiter_index += len(cells_content[i]) + 1
+								rows[-1][auxiliar_cell_index].position = aux_delimiter_index  # Position of cell delimiter +
+								rows[-1][auxiliar_cell_index].set_alignment()
+								while aux_delimiter_index > delimiter_positions[auxiliar_cell_index]:
+									auxiliar_cell_index += 1
+								auxiliar_cell_index += 1
+
+						if len(cells_content) <= number_of_columns: # Colspan: Positions of | with respect to + need to be determined
+							column_index = 0
+							for i in range(len(cells_content)):
+								if _matchGridTableBodySeparatorLine.match(cells_content[i]):  # A new row is to be added
+									rows_tracker[column_index] += 1
+									rows[rows_tracker[column_index]][column_index].list_flag = False
 									#auxiliar_rows[-1]['use_auxiliar_row'][i] = True
-									auxiliar_rows[-1][i].list_flag = False
-									table_row[i].auxiliar_index = len(auxiliar_rows)-1
 									#if cells[i].startswith(":") and not cells[i].endswith(":"):
 									#	auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"left\""
 									#elif not cells[i].startswith(":") and  cells[i].endswith(":"):
 									#	auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"right\""
 									#else:
 									#	auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"center\""
+									column_forward = 0
+									for del_index in range(column_index, len(delimiter_positions)):
+										if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[del_index]:
+											column_forward += 1
+											rows_tracker[column_index + column_forward - 1] += 1 if column_forward > 1 else 0
+									column_index += column_forward
+									continue
 								else:
 									# Handle content of the cell
-									if table_row[i].auxiliar_index is not None: # and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
-										auxiliar_rows[table_row[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row[i].auxiliar_index][i], cells[i])
-										if not auxiliar_rows[table_row[i].auxiliar_index][i].colspan_adjusted:
-											auxiliar_rows[table_row[i].auxiliar_index][i].colspan_adjusted = True
-											# TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
-											auxiliar_rows[table_row[i].auxiliar_index][i] = adjust_colspan(auxiliar_rows[table_row[i].auxiliar_index], i, number_of_columns, line, number_of_columns, delimiter_positions)
-											table_row_index += auxiliar_rows[table_row[table_row_index].auxiliar_index][i].colspan - 1
-									else:
-										table_row[table_row_index] = handling_content(table_row[table_row_index], cells[i])
-										# Cell which is not separator
-										table_row[table_row_index].rowspan += 1
-										if not table_row.cells[table_row_index].colspan_adjusted:
-											table_row[table_row_index].colspan_adjusted = True
-											#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
-											table_row[table_row_index] = adjust_colspan(table_row, table_row_index, number_of_columns, line, number_of_columns, delimiter_positions)
-											#table_row_index += table_row[i].colspan - 1 #Move forward index i
-								if table_row[table_row_index].position == delimiter_positions[i]:
-									table_row_index += table_row[table_row_index].colspan if table_row[table_row_index].colspan != 0 else 1
+									rows[rows_tracker[column_index]][column_index] = handling_content(rows[rows_tracker[column_index]][column_index], cells_content[i])
+									rows[rows_tracker[column_index]][column_index].rowspan += 1
+									if not rows[rows_tracker[column_index]][column_index].colspan_adjusted:
+										rows[rows_tracker[column_index]][column_index].colspan_adjusted = True
+										# TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
+										rows[rows_tracker[column_index]][column_index] = adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, line, number_of_columns, delimiter_positions)
+
+									if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index]:
+										column_index += rows[rows_tracker[column_index]][column_index].colspan if rows[rows_tracker[column_index]][column_index].colspan != 0 else 1
+									continue
+
 						else:
 							raise ValueError("More cells than columns found")
 					else: # Data row
-						cells = re.split(r"\s*\|\s*", line.strip("|"))
-						if len(cells) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined
-							table_row_index = 0
-							for i in range(len(cells)):
+						cells_content = re.split(r"\s*\|\s*", line.strip("|"))
+						column_index = 0
+						if len(cells_content) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined
+							for i in range(len(cells_content)):
 								# Handle content of the cell
-								if table_row[table_row_index].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
-									auxiliar_rows[table_row.cells[table_row_index].auxiliar_index][i] = handling_content(auxiliar_rows[table_row[table_row_index].auxiliar_index][i], cells[i])
-									if not auxiliar_rows[table_row[table_row_index].auxiliar_index].cells[i].colspan_adjusted:
-										auxiliar_rows[table_row[table_row_index].auxiliar_index].cells[i].colspan_adjusted = True
-										#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
-										auxiliar_rows[table_row[table_row_index].auxiliar_index][i] = adjust_colspan(auxiliar_rows[table_row[table_row_index].auxiliar_index].cells, i, number_of_columns, line, number_of_columns, delimiter_positions)
-										table_row_index += auxiliar_rows[table_row[table_row_index].auxiliar_index][i].colspan - 1  # Move forward index i
-								else:
-									table_row[table_row_index] = handling_content(table_row[table_row_index], cells[i])
-									if not table_row.cells[table_row_index].colspan_adjusted:
-										table_row[table_row_index].colspan_adjusted = True
-										table_row[table_row_index] = adjust_colspan(table_row.cells, table_row_index, number_of_columns, line, number_of_columns, delimiter_positions)
-										table_row_index += table_row[table_row_index].colspan - 1  # Move forward index i
-
-								table_row_index += 1
-						elif len(cells) == number_of_columns: # Simple row
-							for i in range(len(cells)):
-								if table_row[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
-									auxiliar_rows[table_row[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row[i].auxiliar_index][i], cells[i])
-								else:
-									# Handle content of the cell
-									table_row[i] = handling_content(table_row[i], cells[i])
+								rows[rows_tracker[column_index]][column_index] = handling_content(rows[rows_tracker[column_index]][column_index], cells_content[i])
+								if not rows[rows_tracker[column_index]][column_index].colspan_adjusted:
+									rows[rows_tracker[column_index]][column_index].colspan_adjusted = True
+									#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
+									rows[rows_tracker[column_index]][column_index] = adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, line, number_of_columns, delimiter_positions)
+								if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index]:
+									column_index += rows[rows_tracker[column_index]][column_index].colspan  # Move forward index i
+
+						elif len(cells_content) == number_of_columns: # Simple row
+							for i in range(len(cells_content)):
+								rows[rows_tracker[i]][i] = handling_content(rows[rows_tracker[i]][i], cells_content[i])
 						else:
 							raise ValueError("More cells than columns found")
 				else:
 					raise ValueError("No separator line found for row starting")
 
 			if has_header and start >= header_separator_index: # table_row and auxiliar_row are part of data_rows
-				data_rows.append(table_row.cells)
-				if has_merged_cells:
-					for row in auxiliar_rows:
-						#for i in range(len(row.cells)):
-						#	print(row.cells[i].content)
-						data_rows.append(row.cells)
+				for body_row in rows:
+					data_rows.append(body_row.cells)
 			elif has_header and start < header_separator_index: # table_row and auxiliar_row are part of header_rows
-				header_rows.append(table_row.cells)
-				if has_merged_cells:
-					for row in auxiliar_rows:
-						header_rows.append(row.cells)
+				for header_row in rows:
+					header_rows.append(header_row.cells)
 
 	#print(header_rows)
 	#print(data_rows)
-- 
GitLab