diff --git a/toMkdocs/gridTableTools.py b/toMkdocs/gridTableTools.py index 170c3a5dd8d02452b1b5e6f3bd5fdd847c9cdc47..95fde45e8869b56f17b39cd8d853b8ff58251d69 100644 --- a/toMkdocs/gridTableTools.py +++ b/toMkdocs/gridTableTools.py @@ -27,24 +27,37 @@ class GridCell: self.auxiliarIndex:int = 0 - def calculateAndSetAlignment(self, headerDelimiterPositions:list[int], defaultAlignments:list[str]) -> None: + def calculateAndSetAlignment(self) -> None: """ Set the alignment of the cell based on the position of the delimiter. """ if self.position is None: raise ValueError('Cell position must be set before calculating alignment.') - headerDelimiterIndex = 0 - while headerDelimiterIndex < len(defaultAlignments) and self.position > headerDelimiterPositions[headerDelimiterIndex]: - headerDelimiterIndex += 1 - if headerDelimiterIndex < len(defaultAlignments): - if self.position < headerDelimiterPositions[headerDelimiterIndex]: - self.alignment = defaultAlignments[headerDelimiterIndex] - elif self.position == headerDelimiterPositions[headerDelimiterIndex]: - self.alignment = defaultAlignments[headerDelimiterIndex] + if hasHeader: + headerDelimiterIndex = 0 + while headerDelimiterIndex < len(defaultAlignments) and self.position > headerDelimiterPositions[headerDelimiterIndex]: headerDelimiterIndex += 1 + if headerDelimiterIndex < len(defaultAlignments): + if self.position < headerDelimiterPositions[headerDelimiterIndex]: + self.alignment = defaultAlignments[headerDelimiterIndex] + elif self.position == headerDelimiterPositions[headerDelimiterIndex]: + self.alignment = defaultAlignments[headerDelimiterIndex] + headerDelimiterIndex += 1 + else: + raise ValueError('Invalid table formatting') else: - raise ValueError('Invalid table formatting') - + body_delimiter_index = 0 + while body_delimiter_index in range(len(defaultAlignments)) and self.position > delimiterPositions[body_delimiter_index]: + body_delimiter_index += 1 + if body_delimiter_index in range(len(defaultAlignments)): + if self.position < delimiterPositions[body_delimiter_index]: + self.alignment = defaultAlignments[body_delimiter_index] + elif self.position == delimiterPositions[body_delimiter_index]: + self.alignment = defaultAlignments[body_delimiter_index] + body_delimiter_index += 1 + else: + raise ValueError("Invalid table formatting") + def __str__(self): return f'(Content: {self.content}, Rowspan: {self.rowspan}, Colspan: {self.colspan}, Alignment: {self.alignment}, Position: {self.position}, ListFlag: {self.listFlag}, AuxiliarIndex: {self.auxiliarIndex})' @@ -78,6 +91,48 @@ class GridRow(): def __repr__(self): return self.__str__() + def check_delimiter_alignment(line: str, delimiters: str = "|+") -> bool: + """ + Check if delimiters in a row align with expected positions. + + Args: + line: The line of text to check + delimiter_positions: List of expected positions (based on + characters) + delimiters: String containing valid delimiter characters (default: "|+") + + Returns: + bool: True if delimiters align correctly, False otherwise + """ + if not line or not delimiterPositions: + return False + + print(f"\nChecking line: '{line}'") + print(f"Expected delimiter positions: {delimiterPositions}") + + # For full separator lines (only +) + if '+' in line and '|' not in line: + currentPositions = [i for i, char in enumerate(line) if (char == '+' and i != 0)] + print(f"Full separator line - Found + at positions: {currentPositions}") + return all(delimiterPositions[-1] in currentPositions and + line.startswith("+") and + pos in delimiterPositions for pos in currentPositions) + + # For data lines (only |) + if '|' in line and '+' not in line: + currentPositions = [i for i, char in enumerate(line) if (char == '|' and i != 0)] + print(f"Data line - Found | at positions: {current_positions}") + return all(delimiterPositions[-1] in currentPositions and + line.startswith("|") and + pos in delimiterPositions for pos in currentPositions) + + # For partial separators (mix of + and |) + currentPositions = [i for i, char in enumerate(line) if (char in delimiters and i != 0)] + print(f"Partial separator - Found delimiters at positions: {currentPositions}") + print(f"Characters at those positions: {[line[pos] for pos in currentPositions]}") + return all(delimiterPositions[-1] in currentPositions and + (line.startswith("+") or line.startswith("|")) and + pos in delimiterPositions for pos in currentPositions) + class GridRowsTracker(): """ Represents the document object. """ @@ -112,7 +167,15 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR :param pandoc_table: String of the Pandoc-style grid table. :return: List of lists representing the table with metadata for spans. """ - + global hasHeader, defaultAlignments, headerDelimiterPositions, delimiterPositions, nextListElementMark + + # Initialize globals + hasHeader = False + defaultAlignments:list[str] = [] + headerDelimiterPositions:list[int] = [] + delimiterPositions:list[int] = [] + nextListElementMark = '@' + # Split the input into lines lines:list[str] = [line.strip() for line in gridTable.strip().split('\n')] @@ -131,14 +194,13 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR cell.colspan += 1 if _c.startswith('- '): # List in a cell cell.listFlag = True - cell.content = _c + '\n' # Add newline to know when the list element ends - + _c = re.sub(r'\\\s*$', '\n', _c) + cell.content = _c + nextListElementMark # Add list element end mark to know when the list element ends elif cell.listFlag and len(_c) > 0: # any other content when handling list is concatenated to the last list element - cell.content = _c + '\n' - + _c = re.sub(r'\\\s*$', '\n', _c) + cell.content += _c + nextListElementMark #add the list element end mark elif not _c: # separation between list and other paragraph - cell.listFlag = False - cell.content = '\n' #if not cell['content'].endswith("\n") else "" + cell.content += '\n' if not cell['content'].endswith('\n') else "" else: cell.content = re.sub(r'\\\s*$', '\n', _c) else: # Cell has content @@ -147,11 +209,16 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR cell.content += '\n' #cell['content'] = cell['content'].strip("\n") cell.listFlag = True - cell.content += _c + '\n' # Add newline to know when the list element ends - elif cell.listFlag and _c: # any other content when handling list is concatenated to the last list element - cell.content = cell.content.strip('\n') + ' ' + _c + '\n' + _c = re.sub(r'\\\s*$', '\n', _c) + cell.content += _c + nextListElementMark # Add list element end mark to know when the list element ends + elif cell.listFlag and len(_c) > 0: # any other content when handling list is concatenated to the last list element + cell.content = cell.content.strip(nextListElementMark) #remove list element end mark + _c = re.sub(r'\\\s*$', '\n', _c) + cell.content += " " + _c + nextListElementMark #add list element end mark elif len(_c) == 0: # separation between list and other paragraph - cell.listFlag = False + if cell.list_flag: + cell.list_flag = False + cell.content += '\n\n' #end list by \n #content = re.sub(r'\\\s*$', "\n", content.strip()) cell.content += '\n' if not cell.content.endswith('\n') else '' else: @@ -202,11 +269,8 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR # Determine delimter positions and alignments - hasHeader = False - headerDelimiterPositions:list[int] = [] headerRows:GridTableRowList = [] dataRows:GridTableRowList = [] - defaultAlignments:list[str] = [] for index in separatorIndices: if matchGridTableHeaderSeparator.match(lines[index]): @@ -217,7 +281,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR for partIndex in range(len(parts)): if parts[partIndex].startswith(':') and not parts[partIndex].endswith(':'): # Left alignment defaultAlignments.append('align="left"') - elif not parts[partIndex].startswith(":") and parts[partIndex].endswith(":"): # Right alignment + elif not parts[partIndex].startswith(':') and parts[partIndex].endswith(':'): # Right alignment defaultAlignments.append('align="right"') else: defaultAlignments.append('align="center"') # Center alignment @@ -226,6 +290,18 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR delPositions = [lines[index].find(delimiter, delimiterPositionsStart + 1) for delimiter in '+' if delimiter in lines[index][delimiterPositionsStart + 1:]] headerDelimiterPositions.append(min(delPositions) if delPositions else -1) + if not hasHeader: + #Set default alignments from the first separator + parts = re.split(r'\+', lines[0].strip('+')) + default_alignments = [] + # Calculate default alignments and positions of delimiters + for part_index in range(len(parts)): + if parts[part_index].startswith(':') and not parts[part_index].endswith(':'): + default_alignments.append('align="left"') + elif not parts[part_index].startswith(':') and parts[part_index].endswith(':'): + default_alignments.append('align="right"') + else: + default_alignments.append('align="center"') for rowNumber in range(len(separatorIndices) - 1): rows:list[GridRow] = [] @@ -238,6 +314,10 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR for line in rowLines: if isSeparator(line) and not inDataRow: inDataRow = True + # Add delimiter alignment check for separator lines + if not check_delimiter_alignment(line, delimiterPositions): + raise ValueError(f"Misaligned delimiters in separator row: {line}") + parts = re.split(r'\s*\+\s*', line.strip('+')) delimiterIndex = 0 @@ -254,7 +334,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR cell.position = delimiterIndex # Position of cell delimiter + # Set alignment as defined by header separator line - cell.calculateAndSetAlignment(headerDelimiterPositions, defaultAlignments) + cell.calculateAndSetAlignment() while delimiterIndex > delimiterPositions[columnIndex]: columnIndex += 1 @@ -263,7 +343,11 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR elif inDataRow: # Regular data row or partial separator if matchGridTableBodySeparator.match(line): # Partial separator - cellsContent = re.split(r"[\|\+]", line.strip("|").strip("+")) # (?<!\\)[\|\+] + # Add delimiter alignment check for partial separators + if not check_delimiter_alignment(line, delimiterPositions): + raise ValueError(f"Misaligned delimiters in partial separator: {line}") + + cellsContent = re.split(r"[\|\+]", line.strip('|').strip('+')) # (?<!\\)[\|\+] #Add another row, set delimiters for each cell rows.append(GridRow(numberOfColumns)) auxDelimiterIndex = 0 @@ -274,7 +358,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR auxDelimiterIndex += len(content) + 1 cell = rows[-1][auxiliarCellIndex] cell.position = auxDelimiterIndex # Position of cell delimiter + - cell.calculateAndSetAlignment(headerDelimiterPositions, defaultAlignments) + cell.calculateAndSetAlignment() while auxDelimiterIndex > delimiterPositions[auxiliarCellIndex]: auxiliarCellIndex += 1 auxiliarCellIndex += 1 @@ -318,7 +402,13 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR raise ValueError("More cells than columns found") else: # Data row - cellsContent = re.split(r'\s*\|\s*', line.strip('|')) + cellsContent = line.strip() + cellsContent = re.split(r"\|", line.strip('|')) + + # Add delimiter alignment check + if not check_delimiter_alignment(line, delimiterPositions): + raise ValueError(f"Misaligned delimiters in row: {line}") + columnCellIndex = 0 if len(cellsContent) < numberOfColumns: # Colspan: Positions of | with respect to + need to be determined for columnIndex, content in enumerate(cellsContent): @@ -347,6 +437,10 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR elif hasHeader and start < headerSeparatorIndex: # table_row and auxiliar_row are part of header_rows for row in rows: # header rows headerRows.append(row.cells) + else: + #only body + for row in rows: + dataRows.append(row.cells) # Check if there are any data rows if not dataRows and not headerRows: @@ -432,13 +526,27 @@ def generateHtmlTableWithSpans(gridTable:str) -> str: Returns: The HTML table in string format. """ + debug_output = [] + def debug_print(msg): + debug_output.append(str(msg)) # Convert message to string + try: + # Redirect print statements to our debug collector + global print + original_print = print + print = debug_print + gridHeader, gridBody = parseGridTableWithSpans(gridTable) - except Exception as e: - import traceback - traceback.print_exc() - return f'HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS. {e}' + + # Restore original print + print = original_print + except Exception as e: + debug_print("Grid table could not be generated") + debug_text = "<br>".join(debug_output) # Now all items are strings + return f'HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE.<br><pre>{debug_text}</pre>' + + # Generate table HTML... html = '<table>\n' hasHeader = False @@ -457,13 +565,13 @@ def generateHtmlTableWithSpans(gridTable:str) -> str: continue else: # Prepare content, in case there's a list - if cell.content is not None and (matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>", cell.content)): # Update cell in new row + if cell.content is not None and (matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+((?:(?!@).)+)@", cell.content)): # Update cell in new row list = "<ul>" # Build list the matches for match in matches: list += "<li>" + match[1] + "</li>" list += "</ul>" - cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+", list, cell.content) + cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+(?:(?!@).)+@)+", list, cell.content) # Enforce left alignment if cell contains a list cell.alignment = "align=\"left\"" @@ -482,13 +590,13 @@ def generateHtmlTableWithSpans(gridTable:str) -> str: continue else: #Prepare content, in case there's a list - if cell.content is not None and (matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>", cell.content)): # Update cell in new row + if cell.content is not None and (matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+((?:(?!@).)+)@", cell.content)): # Update cell in new row list = "<ul>" # Build list the matches for match in matches: list += "<li>" + match[1] + "</li>" list += "</ul>" - cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+",list, cell.content) + cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+(?:(?!@).)+@)+",list, cell.content) # Enforce left alignment if cell contains a list cell.alignment = "align=\"left\""