diff --git a/generateChangemarks/.gitlab-ci.yml b/generateChangemarks/.gitlab-ci.yml index d99c090915fdeaa11cd54a03d7774c997257a929..5e48e0e4d33853cd6906a2970e6c17f7586d37b2 100644 --- a/generateChangemarks/.gitlab-ci.yml +++ b/generateChangemarks/.gitlab-ci.yml @@ -197,12 +197,12 @@ pages: curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fstylesheets%2Fextra%2Ecss/raw?ref=master" >> extra.css - mkdir -p docs/stylesheets && mv extra.css docs/stylesheets/ - | - curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fmkdocs%2Eyml/raw?ref=master" >> mkdocs.yml + curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fmkdocs%2Eyml/raw?ref=gridtables" >> mkdocs.yml - | curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FindexDownload%2Emd/raw?ref=master" >> indexDownload.md - mkdir -p docs/download && mv indexDownload.md docs/download/index.md - | - curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FtoMkdocs%2Epy/raw?ref=master" >> toMkdocs.py + curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FtoMkdocs%2Epy/raw?ref=gridtables" >> toMkdocs.py - | export SPEC_NAME=$(ls | grep -E "(TS|TR|WI).*\.md" | cut -d'.' -f1) - | diff --git a/toMkdocs/mkdocs.yml b/toMkdocs/mkdocs.yml index c1c3ac830acc31c58e920a332beb60220d7b38dd..dc5cf7752586908c27fa0c2b642e495810845c16 100644 --- a/toMkdocs/mkdocs.yml +++ b/toMkdocs/mkdocs.yml @@ -60,6 +60,8 @@ markdown_extensions: pygments_lang_class: true - pymdownx.inlinehilite - pymdownx.snippets + - pymdownx.arithmatex: + generic: true - pymdownx.superfences: custom_fences: - name: mermaid @@ -69,6 +71,10 @@ markdown_extensions: alternate_style: true - tables +extra_javascript: + - javascripts/mathjax.js + - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js + ############################################################################## extra: diff --git a/toMkdocs/toMkdocs.py b/toMkdocs/toMkdocs.py index 49778dee08a1293f371d8c44f9b403ad0c6992a2..3908718aa7c7e36a6c86e45c0977bc70de5b4d66 100644 --- a/toMkdocs/toMkdocs.py +++ b/toMkdocs/toMkdocs.py @@ -11,6 +11,7 @@ from enum import Enum, auto import argparse, re, os, shutil, hashlib, base64 from dataclasses import dataclass from rich import print +from html import escape verbose = False veryVerbose = False @@ -418,6 +419,9 @@ _matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE) _matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECASE) _matchTable = re.compile(r'^\s*\|.*\|\s$', re.IGNORECASE) _matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE) +_matchGridTable = re.compile(r'^\s*\+-.*\+\s$', re.IGNORECASE) +_matchGridTableBodySeparator = re.compile(r'.*\+([-:]+\+)+.*$', re.IGNORECASE) +_matchGridTableHeaderSeparator = re.compile(r'.*\+([=:]+\+)+.*$', re.IGNORECASE) _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE) _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE) _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) @@ -447,6 +451,309 @@ def shortHash(value:str, length:int) -> str: ).digest() ).decode()[:length] +def parse_pandoc_table_with_spans(pandoc_table): + """ + Parse a Pandoc-style grid table into a structure for HTML conversion with rowspan and colspan. + + :param pandoc_table: String of the Pandoc-style grid table. + :return: List of lists representing the table with metadata for spans. + """ + # Split the input into lines + lines = [line.strip() for line in pandoc_table.strip().split("\n")] + + # Detect separator lines by pattern (it does not take into account partial separators + def is_separator(line): + _matchGridTableSeparator = re.compile(r'\s*\+([-:=]+\+)+\s*$', re.IGNORECASE) + return _matchGridTableSeparator.match(line) + + _matchGridTableSeparatorLine = re.compile(r'[-:]+$', re.IGNORECASE) + separator_indices = [i for i, line in enumerate(lines) if is_separator(line)] + + print(separator_indices) + if not separator_indices: + raise ValueError("No valid separators found in the provided Pandoc table.") + + # Calculate max number of columns + delimiter_positions = [] + number_of_columns = 0 + for separator_index in separator_indices: + if lines[separator_index].count("+") - 1 > number_of_columns: + number_of_columns = lines[separator_index].count("+") - 1 + delimiter_positions = [] + for j in range(number_of_columns): + delimiter_positions_start = delimiter_positions[j - 1] if j != 0 else 0 + del_positions = [lines[separator_index].find(delimiter, delimiter_positions_start + 1) for delimiter in "+" if delimiter in lines[separator_index][delimiter_positions_start + 1:]] + delimiter_positions.append(min(del_positions) if del_positions else -1) + has_header = False + for index in separator_indices: + if _matchGridTableHeaderSeparator.match(lines[index]): + has_header = True + header_separator_index = index + header_rows = [] + data_rows = [] + for row in range(len(separator_indices) - 1): + table_row = [] + auxiliar_row = [] + use_auxiliar_row = [] + has_merged_cells = False + in_data_row = False + start, end = separator_indices[row], separator_indices[row + 1] + row_lines = lines[start:end] # Lines between separators including separator line start as it gives information about the number of columns of the row + if row_lines: + # Combine multiline content into single strings for each cell + for line in row_lines: + if is_separator(line) and not in_data_row: + number_of_columns_row = line.count("+") - 1 + in_data_row = True + parts = re.split(r"\s*\+\s*", line.strip("+")) + # Add as many cells as columns with span attributes + delimiter_index = 0 + for i in range(number_of_columns_row): + delimiter_index += len(parts[i]) + 1 + table_row.append({ + "content": "NOCONTENT", + "rowspan": 0, + "colspan": 0, + "colspan_adjusted": False, + "position": delimiter_index # Position of cell delimiter + + }) + for i in range(number_of_columns): + auxiliar_row.append({ + "content": "NOCONTENT", + "rowspan": 0, + "colspan": 0, + "colspan_adjusted": False, + "position": 0 + }) + use_auxiliar_row.append(False) + + elif in_data_row: + # Regular data row or partial separator + if _matchGridTableBodySeparator.match(line): # Partial separator + has_merged_cells = True + cells = re.split(r"\s*[\|\+]\s*", line.strip("|").strip("+")) # (?<!\\)[\|\+] + if len(cells) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined + for i in range(len(cells)): + if _matchGridTableSeparatorLine.match(cells[i]): # A new row is to be added + use_auxiliar_row[i] = True + else: + if table_row[i]['content'] == "NOCONTENT": + table_row[i]['rowspan'] += 1 + table_row[i]['colspan'] += 1 + table_row[i]['content'] = cells[i] + else: + table_row[i]['content'] += cells[i] + # Cell which is not separator + table_row[i]['rowspan'] += 1 + if not table_row[i]['colspan_adjusted']: + table_row[i]['colspan_adjusted'] = True + for j in range(i, len(cells)): + delimiter_start = table_row[j-1]['position'] if j != 0 else 0 + positions = [line.find(delimiter, delimiter_start + 1) for delimiter in "|+" if delimiter in line[delimiter_start + 1:]] + position = min(positions) if positions else -1 + if position > delimiter_positions_start[j]: # Colspan to add + table_row[i]['colspan'] += 1 + elif position < delimiter_positions_start[j]: + raise ValueError("Wrong cell formatting") + else: + break + elif len(cells) == number_of_columns: # Simple row with partial separator, # A new row is to be added + for i in range(len(cells)): + if _matchGridTableSeparatorLine.match(cells[i]): # Update cell in new row + use_auxiliar_row[i] = True + else: + if table_row[i]['content'] == "NOCONTENT": + table_row[i]['rowspan'] += 1 + table_row[i]['colspan'] += 1 + table_row[i]['content'] = cells[i] + else: + table_row[i]['content'] += cells[i] + # Cell which is not separator + table_row[i]['rowspan'] += 1 + # Not needed, no colspan as number of cells is equal to number of columns + #for j in range(i, len(cells)): + # delimiter_start = table_row[j-1]['position'] if j != 0 else 0 + # positions = [line.find(delimiter,delimiter_start+1) for delimiter in "|+" if delimiter in line[delimiter_start+1:]] + # position = min(positions) if positions else -1 + # if position > table_row[i]['position']: # Only colspan to be increased + # table_row[i]['colspan'] += 1 + # elif position + 1 < table_row[i]['position']: + # raise ValueError("Wrong cell formatting") + # else: + # break + + else: + raise ValueError("More cells than columns found") + else: # Data row + cells = re.split(r"\s*\|\s*", line.strip("|")) + if len(cells) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined + for i in range(len(cells)): + if table_row[i]['content'] == "NOCONTENT": + table_row[i]['rowspan'] += 1 + table_row[i]['colspan'] += 1 + table_row[i]['content'] = cells[i] + else: + table_row[i]['content'] += cells[i] + if not table_row[i]['colspan_adjusted']: + table_row[i]['colspan_adjusted'] = True + for j in range(i, len(cells)): + delimiter_start = table_row[j-1]['position'] if j != 0 else 0 + if line.find("|", delimiter_start+1) > delimiter_positions[j]: # Colspan to be increased + table_row[i]['colspan'] += 1 + elif line.find("|", delimiter_start+1) < delimiter_positions[j]: + raise ValueError("Wrong cell formatting") + else: + + break + + elif len(cells) == number_of_columns: # Simple row + for i in range(len(cells)): + if use_auxiliar_row[i]: + if auxiliar_row[i]['content'] == "NOCONTENT": + auxiliar_row[i]['rowspan'] += 1 + auxiliar_row[i]['colspan'] += 1 + auxiliar_row[i]['content'] = cells[i] + else: + auxiliar_row[i]['content'] += cells[i] + else: + if table_row[i]['content'] == "NOCONTENT": + table_row[i]['rowspan'] += 1 + table_row[i]['colspan'] += 1 + table_row[i]['content'] = cells[i] + else: + table_row[i]['content'] += cells[i] + else: + raise ValueError("More cells than columns found") + else: + raise ValueError("No separator line found for row starting") + + if has_header and start >= header_separator_index: # table_row and auxiliar_row are part of data_rows + data_rows.append(table_row) + if has_merged_cells: + data_rows.append(auxiliar_row) + elif has_header and start < header_separator_index: # table_row and auxiliar_row are part of header_rows + header_rows.append(table_row) + if has_merged_cells: + header_rows.append(auxiliar_row) + + #print(header_rows) + #print(data_rows) + # Correct newlines characters + for row in header_rows: + for cell in row: + cell['content'] = cell['content'].replace("\\", "<br>") + for row in data_rows: + for cell in row: + cell['content'] = cell['content'].replace("\\", "<br>") + # Check if there are any data rows + if not data_rows and not header_rows: + raise ValueError("No valid rows found in the provided Pandoc table.") + + # Format text + bold = "<strong>" + for row in header_rows: + for cell in row: + while cell['content'].find("**") != -1: + cell['content'] = cell['content'].replace("**", bold, 1) + if bold == "<strong>": + bold = "</strong>" + else: + bold = "<strong>" + bold = "<strong>" + for row in data_rows: + for cell in row: + while cell['content'].find("**") != -1: + cell['content'] = cell['content'].replace("**", bold, 1) + if bold == "<strong>": + bold = "</strong>" + else: + bold = "<strong>" + + # Checking that the grid is correct Not too much tested - need to take into account rowspan of previous rows + + forward_rowspan = [] + for row_index in range(len(header_rows)): + if len(forward_rowspan) == 0: + forward_rowspan = [0 for _ in range(len(header_rows[row_index]))] + sum = 0 + for cell_index in range(len(header_rows[row_index])): + sum += header_rows[row_index][cell_index]['colspan'] + if row_index > 0 and header_rows[row_index][cell_index]['colspan'] == 0: + if forward_rowspan[cell_index] > 0: + sum += 1 + forward_rowspan[cell_index] -= 1 + if forward_rowspan[cell_index] == 0 and header_rows[row_index][cell_index]['rowspan'] > 1: + forward_rowspan[cell_index] = header_rows[row_index][cell_index]['rowspan'] -1 + if not sum == number_of_columns: + raise ValueError("Grid table not converted properly") + forward_rowspan = [] + for row_index in range(len(data_rows)): + if len(forward_rowspan) == 0: + forward_rowspan = [0 for _ in range(len(data_rows[row_index]))] + sum = 0 + for cell_index in range(len(data_rows[row_index])): + sum += data_rows[row_index][cell_index]['colspan'] + if row_index > 0 and data_rows[row_index][cell_index]['colspan'] == 0: + if forward_rowspan[cell_index] > 0: + sum += 1 + forward_rowspan[cell_index] -= 1 + if forward_rowspan[cell_index] == 0 and data_rows[row_index][cell_index]['rowspan'] > 1: + forward_rowspan[cell_index] = data_rows[row_index][cell_index]['rowspan'] - 1 + if not sum == number_of_columns: + raise ValueError("Grid table not converted properly") + #if has_header: + # table_with_spans = header_rows + + #table_with_spans += data_rows + + #return table_with_spans + return header_rows, data_rows + +def generate_html_table_with_spans(pandoc_table): + """ + Generate an HTML table from a Pandoc-style grid table with row and column spans. + + :param pandoc_table: String of the Pandoc-style grid table. + :return: HTML string. + """ + grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table) + + html = "<table>\n" + has_header = False + + for row in grid_header: + for cell in row: + if cell['rowspan'] != 0 and cell['colspan'] != 0: + has_header = True + if has_header: + html += " <thead>\n" + for row in grid_header: + html += " <tr>\n" + for cell in row: + if cell['rowspan'] == 0 or cell['colspan'] == 0: + continue + else: + rowspan = f" rowspan=\"{cell['rowspan']}\"" if cell["rowspan"] > 1 else "" + colspan = f" colspan=\"{cell['colspan']}\"" if cell["colspan"] > 1 else "" + html += f" <td{rowspan}{colspan}>{cell['content']}</td>\n" + html += " </tr>\n" + html += " </thead>\n" + + html += " <tbody>\n" + for row in grid_body: + html += " <tr>\n" + for cell in row: + if cell['rowspan'] == 0 or cell['colspan'] == 0: + continue + else: + rowspan = f" rowspan=\"{cell['rowspan']}\"" if cell["rowspan"] > 1 else "" + colspan = f" colspan=\"{cell['colspan']}\"" if cell["colspan"] > 1 else "" + html += f" <td{rowspan}{colspan}>{cell['content']}</td>\n" + html += " </tr>\n" + + html += " </tbody>\n" + html += "</table>" + return html def analyseMarkdown(filename:str) -> Document: """ Analyse the markdown file and split it into clauses. @@ -473,6 +780,9 @@ def analyseMarkdown(filename:str) -> Document: inCodefence = False inTable = False tableHasSeparator = False + inGridTable = False + gridTableHasSeparator = False + gridTable = "" for line in inLines: # Detect and handle codefences @@ -493,7 +803,7 @@ def analyseMarkdown(filename:str) -> Document: continue # Detect and handle tables - if _matchTable.match(line) and not inTable: + if _matchTable.match(line) and not inTable and not inGridTable: inTable = True outClauses[-1].append(Line(line, LineType.TABLEHEADER)) continue @@ -512,8 +822,36 @@ def analyseMarkdown(filename:str) -> Document: outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW # continue with other matches + #Detect grid tables and convert them to html table + if _matchGridTable.match(line) and not inGridTable: + inGridTable = True + #outClauses[-1].append(Line(line, LineType.TABLEHEADER)) + gridTable += line + continue + if inGridTable: + if _matchGridTableHeaderSeparator.match(line) or _matchGridTableBodySeparator.match(line): + #outClauses[-1].append(Line(line, LineType.TABLESEPARATOR)) + gridTable += line + continue + elif _matchTable.match(line): + #outClauses[-1].append(Line(line, LineType.TABLEROW)) + gridTable += line + continue + else: + inGridTable = False + # Mark the previous line as the last row in the table + #outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW + print(gridTable) + htmltable = "" + htmltable = generate_html_table_with_spans(gridTable) + print(htmltable) + for row in htmltable: + outClauses[-1].append(Line(row, LineType.TABLEROW)) + gridTable = "" + # continue with other matches + # Detect notes - # Notes are lines that start with a '>'. + # Notes are lines that start with a '>'. if _matchNote.match(line): outClauses[-1].append(Line(line, LineType.NOTE)) continue @@ -537,7 +875,7 @@ def analyseMarkdown(filename:str) -> Document: clauseTitle = re.sub(_htmlTag, '', clauseTitle) headerNumber = _matchHeaderNumber.search(clauseTitle) outClauses.append(Clause(len(m.groups()[0]), # level - headerNumber.group() if headerNumber else shortHash(clauseTitle, 6), + headerNumber.group() if headerNumber else shortHash(clauseTitle, 6), clauseTitle, [])) _lineType = LineType.HEADING @@ -591,7 +929,7 @@ def processDocument(args:argparse.Namespace) -> None: if __name__ == '__main__': parser = argparse.ArgumentParser(description = 'Convert oneM2M markdown specificatios to MkDocs format', - formatter_class = argparse.ArgumentDefaultsHelpFormatter) + formatter_class = argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--verbose', '-v', action = 'store_true', help = 'verbose output during processing') parser.add_argument('--very-verbose', '-vv', action = 'store_true', help = 'very verbose output during processing')