diff --git a/toMkdocs/mkdocs.yml b/toMkdocs/mkdocs.yml index c1c3ac830acc31c58e920a332beb60220d7b38dd..dc5cf7752586908c27fa0c2b642e495810845c16 100644 --- a/toMkdocs/mkdocs.yml +++ b/toMkdocs/mkdocs.yml @@ -60,6 +60,8 @@ markdown_extensions: pygments_lang_class: true - pymdownx.inlinehilite - pymdownx.snippets + - pymdownx.arithmatex: + generic: true - pymdownx.superfences: custom_fences: - name: mermaid @@ -69,6 +71,10 @@ markdown_extensions: alternate_style: true - tables +extra_javascript: + - javascripts/mathjax.js + - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js + ############################################################################## extra: diff --git a/toMkdocs/toMkdocs.py b/toMkdocs/toMkdocs.py index 49778dee08a1293f371d8c44f9b403ad0c6992a2..f0dea488ed42a383a79fa6a1823b89cd2c8e3898 100644 --- a/toMkdocs/toMkdocs.py +++ b/toMkdocs/toMkdocs.py @@ -11,6 +11,7 @@ from enum import Enum, auto import argparse, re, os, shutil, hashlib, base64 from dataclasses import dataclass from rich import print +from html import escape verbose = False veryVerbose = False @@ -418,6 +419,8 @@ _matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE) _matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECASE) _matchTable = re.compile(r'^\s*\|.*\|\s$', re.IGNORECASE) _matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE) +_matchGridTable = re.compile(r'^\s*\+-.*\+\s$', re.IGNORECASE) +_matchGridTableSeparator = re.compile(r'^\s*\+([-:= ]+\+)+\s*$', re.IGNORECASE) _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE) _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE) _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) @@ -447,6 +450,103 @@ def shortHash(value:str, length:int) -> str: ).digest() ).decode()[:length] +def parse_pandoc_table_with_spans(pandoc_table): + """ + Parse a Pandoc-style grid table into a structure for HTML conversion with rowspan and colspan. + + :param pandoc_table: String of the Pandoc-style grid table. + :return: List of lists representing the table with metadata for spans. + """ + # Split the input into lines + lines = [line.strip() for line in pandoc_table.strip().split("\n")] + + # Identify the separators (`+`) and extract rows + separator_indices = [i for i, line in enumerate(lines) if line.startswith("+")] + data_rows = [] + for i in range(len(separator_indices) - 1): + start, end = separator_indices[i], separator_indices[i + 1] + row_content = lines[start + 1:end] # Lines between separators + if row_content: + data_rows.append(row_content[0]) # Only take the first line of content + + # Parse the rows into cells + parsed_table = [] + for row in data_rows: + parsed_table.append([escape(cell.strip()) for cell in re.split(r"\s*\|\s*", row.strip("|"))]) + + # Detect rowspan and colspan + table_with_spans = [] + column_rowspan_tracker = [0] * len(parsed_table[0]) # Track rowspans per column + + for row_index, row in enumerate(parsed_table): + table_row = [] + col_index = 0 + + while col_index < len(row): + if column_rowspan_tracker[col_index] > 0: + # Skip columns covered by rowspan + column_rowspan_tracker[col_index] -= 1 + col_index += 1 + continue + + cell_content = row[col_index] + + # Check for rowspan (number of subsequent rows without a row separator) + rowspan = 1 + for next_row_index in range(row_index + 1, len(parsed_table)): + next_row = parsed_table[next_row_index] + if col_index < len(next_row) and next_row[col_index] == cell_content: + rowspan += 1 + else: + break + + # Update rowspan tracker + column_rowspan_tracker[col_index] = rowspan - 1 + + # Check for colspan (consecutive identical cells in the current row) + colspan = 1 + for next_col_index in range(col_index + 1, len(row)): + if row[next_col_index] == cell_content: + colspan += 1 + else: + break + + # Add cell with span attributes + table_row.append({ + "content": cell_content, + "rowspan": rowspan, + "colspan": colspan + }) + + # Skip processed columns + col_index += colspan + + table_with_spans.append(table_row) + + return table_with_spans + + +def generate_html_table_with_spans(pandoc_table): + """ + Generate an HTML table from a Pandoc-style grid table with row and column spans. + + :param pandoc_table: String of the Pandoc-style grid table. + :return: HTML string. + """ + grid = parse_pandoc_table_with_spans(pandoc_table) + + html = "<table>\n" + + for row in grid: + html += " <tr>\n" + for cell in row: + rowspan = f" rowspan=\"{cell['rowspan']}\"" if cell["rowspan"] > 1 else "" + colspan = f" colspan=\"{cell['colspan']}\"" if cell["colspan"] > 1 else "" + html += f" <td{rowspan}{colspan}>{cell['content']}</td>\n" + html += " </tr>\n" + + html += "</table>" + return html def analyseMarkdown(filename:str) -> Document: """ Analyse the markdown file and split it into clauses. @@ -473,6 +573,9 @@ def analyseMarkdown(filename:str) -> Document: inCodefence = False inTable = False tableHasSeparator = False + inGridTable = False + gridTableHasSeparator = False + gridTable = "" for line in inLines: # Detect and handle codefences @@ -512,6 +615,35 @@ def analyseMarkdown(filename:str) -> Document: outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW # continue with other matches + #Detect grid tables and convert them to html table + if _matchGridTable.match(line) and not inGridTable: + inGridTable = True + outClauses[-1].append(Line(line, LineType.TABLEHEADER)) + gridTable.append(line) + continue + if inGridTable: + if _matchGridTableSeparator.match(line) and not gridTableHasSeparator: + #outClauses[-1].append(Line(line, LineType.TABLESEPARATOR)) + gridTableHasSeparator = True + gridTable.append(line) + continue + elif _matchGridTable.match(line): + #outClauses[-1].append(Line(line, LineType.TABLESEPARATOR)) + gridTable.append(line) + continue + elif _matchTable.match(line): + #outClauses[-1].append(Line(line, LineType.TABLEROW)) + gridTable.append(line) + continue + else: + inGridTable = False + gridTableHasSeparator = False + # Mark the previous line as the last row in the table + #outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW + htmltable = generate_html_table_with_spans(gridTable) + outClauses[-1].append(htmltable) + # continue with other matches + # Detect notes # Notes are lines that start with a '>'. if _matchNote.match(line):