Skip to content
Snippets Groups Projects
Commit 6c8a9ddc authored by Miguel Angel Reina Ortega's avatar Miguel Angel Reina Ortega
Browse files

Some improvements for grid tables conversion

parent 9d0a1d23
No related branches found
No related tags found
1 merge request!1Restructuring and cleaning scripts for Mkdocs
......@@ -27,12 +27,13 @@ class GridCell:
self.auxiliarIndex:int = 0
def calculateAndSetAlignment(self, headerDelimiterPositions:list[int], defaultAlignments:list[str]) -> None:
def calculateAndSetAlignment(self) -> None:
""" Set the alignment of the cell based on the position of the delimiter.
"""
if self.position is None:
raise ValueError('Cell position must be set before calculating alignment.')
if hasHeader:
headerDelimiterIndex = 0
while headerDelimiterIndex < len(defaultAlignments) and self.position > headerDelimiterPositions[headerDelimiterIndex]:
headerDelimiterIndex += 1
......@@ -44,6 +45,18 @@ class GridCell:
headerDelimiterIndex += 1
else:
raise ValueError('Invalid table formatting')
else:
body_delimiter_index = 0
while body_delimiter_index in range(len(defaultAlignments)) and self.position > delimiterPositions[body_delimiter_index]:
body_delimiter_index += 1
if body_delimiter_index in range(len(defaultAlignments)):
if self.position < delimiterPositions[body_delimiter_index]:
self.alignment = defaultAlignments[body_delimiter_index]
elif self.position == delimiterPositions[body_delimiter_index]:
self.alignment = defaultAlignments[body_delimiter_index]
body_delimiter_index += 1
else:
raise ValueError("Invalid table formatting")
def __str__(self):
......@@ -78,6 +91,48 @@ class GridRow():
def __repr__(self):
return self.__str__()
def check_delimiter_alignment(line: str, delimiters: str = "|+") -> bool:
"""
Check if delimiters in a row align with expected positions.
Args:
line: The line of text to check
delimiter_positions: List of expected positions (based on + characters)
delimiters: String containing valid delimiter characters (default: "|+")
Returns:
bool: True if delimiters align correctly, False otherwise
"""
if not line or not delimiterPositions:
return False
print(f"\nChecking line: '{line}'")
print(f"Expected delimiter positions: {delimiterPositions}")
# For full separator lines (only +)
if '+' in line and '|' not in line:
currentPositions = [i for i, char in enumerate(line) if (char == '+' and i != 0)]
print(f"Full separator line - Found + at positions: {currentPositions}")
return all(delimiterPositions[-1] in currentPositions and
line.startswith("+") and
pos in delimiterPositions for pos in currentPositions)
# For data lines (only |)
if '|' in line and '+' not in line:
currentPositions = [i for i, char in enumerate(line) if (char == '|' and i != 0)]
print(f"Data line - Found | at positions: {current_positions}")
return all(delimiterPositions[-1] in currentPositions and
line.startswith("|") and
pos in delimiterPositions for pos in currentPositions)
# For partial separators (mix of + and |)
currentPositions = [i for i, char in enumerate(line) if (char in delimiters and i != 0)]
print(f"Partial separator - Found delimiters at positions: {currentPositions}")
print(f"Characters at those positions: {[line[pos] for pos in currentPositions]}")
return all(delimiterPositions[-1] in currentPositions and
(line.startswith("+") or line.startswith("|")) and
pos in delimiterPositions for pos in currentPositions)
class GridRowsTracker():
""" Represents the document object. """
......@@ -112,6 +167,14 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
:param pandoc_table: String of the Pandoc-style grid table.
:return: List of lists representing the table with metadata for spans.
"""
global hasHeader, defaultAlignments, headerDelimiterPositions, delimiterPositions, nextListElementMark
# Initialize globals
hasHeader = False
defaultAlignments:list[str] = []
headerDelimiterPositions:list[int] = []
delimiterPositions:list[int] = []
nextListElementMark = '@'
# Split the input into lines
lines:list[str] = [line.strip() for line in gridTable.strip().split('\n')]
......@@ -131,14 +194,13 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
cell.colspan += 1
if _c.startswith('- '): # List in a cell
cell.listFlag = True
cell.content = _c + '\n' # Add newline to know when the list element ends
_c = re.sub(r'\\\s*$', '\n', _c)
cell.content = _c + nextListElementMark # Add list element end mark to know when the list element ends
elif cell.listFlag and len(_c) > 0: # any other content when handling list is concatenated to the last list element
cell.content = _c + '\n'
_c = re.sub(r'\\\s*$', '\n', _c)
cell.content += _c + nextListElementMark #add the list element end mark
elif not _c: # separation between list and other paragraph
cell.listFlag = False
cell.content = '\n' #if not cell['content'].endswith("\n") else ""
cell.content += '\n' if not cell['content'].endswith('\n') else ""
else:
cell.content = re.sub(r'\\\s*$', '\n', _c)
else: # Cell has content
......@@ -147,11 +209,16 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
cell.content += '\n'
#cell['content'] = cell['content'].strip("\n")
cell.listFlag = True
cell.content += _c + '\n' # Add newline to know when the list element ends
elif cell.listFlag and _c: # any other content when handling list is concatenated to the last list element
cell.content = cell.content.strip('\n') + ' ' + _c + '\n'
_c = re.sub(r'\\\s*$', '\n', _c)
cell.content += _c + nextListElementMark # Add list element end mark to know when the list element ends
elif cell.listFlag and len(_c) > 0: # any other content when handling list is concatenated to the last list element
cell.content = cell.content.strip(nextListElementMark) #remove list element end mark
_c = re.sub(r'\\\s*$', '\n', _c)
cell.content += " " + _c + nextListElementMark #add list element end mark
elif len(_c) == 0: # separation between list and other paragraph
cell.listFlag = False
if cell.list_flag:
cell.list_flag = False
cell.content += '\n\n' #end list by \n
#content = re.sub(r'\\\s*$', "\n", content.strip())
cell.content += '\n' if not cell.content.endswith('\n') else ''
else:
......@@ -202,11 +269,8 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
# Determine delimter positions and alignments
hasHeader = False
headerDelimiterPositions:list[int] = []
headerRows:GridTableRowList = []
dataRows:GridTableRowList = []
defaultAlignments:list[str] = []
for index in separatorIndices:
if matchGridTableHeaderSeparator.match(lines[index]):
......@@ -217,7 +281,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
for partIndex in range(len(parts)):
if parts[partIndex].startswith(':') and not parts[partIndex].endswith(':'): # Left alignment
defaultAlignments.append('align="left"')
elif not parts[partIndex].startswith(":") and parts[partIndex].endswith(":"): # Right alignment
elif not parts[partIndex].startswith(':') and parts[partIndex].endswith(':'): # Right alignment
defaultAlignments.append('align="right"')
else:
defaultAlignments.append('align="center"') # Center alignment
......@@ -226,6 +290,18 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
delPositions = [lines[index].find(delimiter, delimiterPositionsStart + 1) for delimiter in '+' if delimiter in lines[index][delimiterPositionsStart + 1:]]
headerDelimiterPositions.append(min(delPositions) if delPositions else -1)
if not hasHeader:
#Set default alignments from the first separator
parts = re.split(r'\+', lines[0].strip('+'))
default_alignments = []
# Calculate default alignments and positions of delimiters
for part_index in range(len(parts)):
if parts[part_index].startswith(':') and not parts[part_index].endswith(':'):
default_alignments.append('align="left"')
elif not parts[part_index].startswith(':') and parts[part_index].endswith(':'):
default_alignments.append('align="right"')
else:
default_alignments.append('align="center"')
for rowNumber in range(len(separatorIndices) - 1):
rows:list[GridRow] = []
......@@ -238,6 +314,10 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
for line in rowLines:
if isSeparator(line) and not inDataRow:
inDataRow = True
# Add delimiter alignment check for separator lines
if not check_delimiter_alignment(line, delimiterPositions):
raise ValueError(f"Misaligned delimiters in separator row: {line}")
parts = re.split(r'\s*\+\s*', line.strip('+'))
delimiterIndex = 0
......@@ -254,7 +334,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
cell.position = delimiterIndex # Position of cell delimiter +
# Set alignment as defined by header separator line
cell.calculateAndSetAlignment(headerDelimiterPositions, defaultAlignments)
cell.calculateAndSetAlignment()
while delimiterIndex > delimiterPositions[columnIndex]:
columnIndex += 1
......@@ -263,7 +343,11 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
elif inDataRow:
# Regular data row or partial separator
if matchGridTableBodySeparator.match(line): # Partial separator
cellsContent = re.split(r"[\|\+]", line.strip("|").strip("+")) # (?<!\\)[\|\+]
# Add delimiter alignment check for partial separators
if not check_delimiter_alignment(line, delimiterPositions):
raise ValueError(f"Misaligned delimiters in partial separator: {line}")
cellsContent = re.split(r"[\|\+]", line.strip('|').strip('+')) # (?<!\\)[\|\+]
#Add another row, set delimiters for each cell
rows.append(GridRow(numberOfColumns))
auxDelimiterIndex = 0
......@@ -274,7 +358,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
auxDelimiterIndex += len(content) + 1
cell = rows[-1][auxiliarCellIndex]
cell.position = auxDelimiterIndex # Position of cell delimiter +
cell.calculateAndSetAlignment(headerDelimiterPositions, defaultAlignments)
cell.calculateAndSetAlignment()
while auxDelimiterIndex > delimiterPositions[auxiliarCellIndex]:
auxiliarCellIndex += 1
auxiliarCellIndex += 1
......@@ -318,7 +402,13 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
raise ValueError("More cells than columns found")
else: # Data row
cellsContent = re.split(r'\s*\|\s*', line.strip('|'))
cellsContent = line.strip()
cellsContent = re.split(r"\|", line.strip('|'))
# Add delimiter alignment check
if not check_delimiter_alignment(line, delimiterPositions):
raise ValueError(f"Misaligned delimiters in row: {line}")
columnCellIndex = 0
if len(cellsContent) < numberOfColumns: # Colspan: Positions of | with respect to + need to be determined
for columnIndex, content in enumerate(cellsContent):
......@@ -347,6 +437,10 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
elif hasHeader and start < headerSeparatorIndex: # table_row and auxiliar_row are part of header_rows
for row in rows: # header rows
headerRows.append(row.cells)
else:
#only body
for row in rows:
dataRows.append(row.cells)
# Check if there are any data rows
if not dataRows and not headerRows:
......@@ -432,13 +526,27 @@ def generateHtmlTableWithSpans(gridTable:str) -> str:
Returns:
The HTML table in string format.
"""
debug_output = []
def debug_print(msg):
debug_output.append(str(msg)) # Convert message to string
try:
# Redirect print statements to our debug collector
global print
original_print = print
print = debug_print
gridHeader, gridBody = parseGridTableWithSpans(gridTable)
# Restore original print
print = original_print
except Exception as e:
import traceback
traceback.print_exc()
return f'HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS. {e}'
debug_print("Grid table could not be generated")
debug_text = "<br>".join(debug_output) # Now all items are strings
return f'HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE.<br><pre>{debug_text}</pre>'
# Generate table HTML...
html = '<table>\n'
hasHeader = False
......@@ -457,13 +565,13 @@ def generateHtmlTableWithSpans(gridTable:str) -> str:
continue
else:
# Prepare content, in case there's a list
if cell.content is not None and (matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>", cell.content)): # Update cell in new row
if cell.content is not None and (matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+((?:(?!@).)+)@", cell.content)): # Update cell in new row
list = "<ul>"
# Build list the matches
for match in matches:
list += "<li>" + match[1] + "</li>"
list += "</ul>"
cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+", list, cell.content)
cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+(?:(?!@).)+@)+", list, cell.content)
# Enforce left alignment if cell contains a list
cell.alignment = "align=\"left\""
......@@ -482,13 +590,13 @@ def generateHtmlTableWithSpans(gridTable:str) -> str:
continue
else:
#Prepare content, in case there's a list
if cell.content is not None and (matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>", cell.content)): # Update cell in new row
if cell.content is not None and (matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+((?:(?!@).)+)@", cell.content)): # Update cell in new row
list = "<ul>"
# Build list the matches
for match in matches:
list += "<li>" + match[1] + "</li>"
list += "</ul>"
cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+",list, cell.content)
cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+(?:(?!@).)+@)+",list, cell.content)
# Enforce left alignment if cell contains a list
cell.alignment = "align=\"left\""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment