Skip to content
Snippets Groups Projects
Commit bc780760 authored by Miguel Angel Reina Ortega's avatar Miguel Angel Reina Ortega
Browse files

Support for grid tables and equations on mkdocs

parent fd0dfa13
No related branches found
No related tags found
No related merge requests found
......@@ -197,12 +197,12 @@ pages:
curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fstylesheets%2Fextra%2Ecss/raw?ref=master" >> extra.css
- mkdir -p docs/stylesheets && mv extra.css docs/stylesheets/
- |
curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fmkdocs%2Eyml/raw?ref=master" >> mkdocs.yml
curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fmkdocs%2Eyml/raw?ref=gridtables" >> mkdocs.yml
- |
curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FindexDownload%2Emd/raw?ref=master" >> indexDownload.md
- mkdir -p docs/download && mv indexDownload.md docs/download/index.md
- |
curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FtoMkdocs%2Epy/raw?ref=master" >> toMkdocs.py
curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FtoMkdocs%2Epy/raw?ref=gridtables" >> toMkdocs.py
- |
export SPEC_NAME=$(ls | grep -E "(TS|TR|WI).*\.md" | cut -d'.' -f1)
- |
......
......@@ -60,6 +60,8 @@ markdown_extensions:
pygments_lang_class: true
- pymdownx.inlinehilite
- pymdownx.snippets
- pymdownx.arithmatex:
generic: true
- pymdownx.superfences:
custom_fences:
- name: mermaid
......@@ -69,6 +71,10 @@ markdown_extensions:
alternate_style: true
- tables
extra_javascript:
- javascripts/mathjax.js
- https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js
##############################################################################
extra:
......
......@@ -11,6 +11,7 @@ from enum import Enum, auto
import argparse, re, os, shutil, hashlib, base64
from dataclasses import dataclass
from rich import print
from html import escape
verbose = False
veryVerbose = False
......@@ -418,6 +419,9 @@ _matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE)
_matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECASE)
_matchTable = re.compile(r'^\s*\|.*\|\s$', re.IGNORECASE)
_matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE)
_matchGridTable = re.compile(r'^\s*\+-.*\+\s$', re.IGNORECASE)
_matchGridTableBodySeparator = re.compile(r'.*\+([-:]+\+)+.*$', re.IGNORECASE)
_matchGridTableHeaderSeparator = re.compile(r'.*\+([=:]+\+)+.*$', re.IGNORECASE)
_match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
_markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE)
_htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
......@@ -447,6 +451,309 @@ def shortHash(value:str, length:int) -> str:
).digest()
).decode()[:length]
def parse_pandoc_table_with_spans(pandoc_table):
"""
Parse a Pandoc-style grid table into a structure for HTML conversion with rowspan and colspan.
:param pandoc_table: String of the Pandoc-style grid table.
:return: List of lists representing the table with metadata for spans.
"""
# Split the input into lines
lines = [line.strip() for line in pandoc_table.strip().split("\n")]
# Detect separator lines by pattern (it does not take into account partial separators
def is_separator(line):
_matchGridTableSeparator = re.compile(r'\s*\+([-:=]+\+)+\s*$', re.IGNORECASE)
return _matchGridTableSeparator.match(line)
_matchGridTableSeparatorLine = re.compile(r'[-:]+$', re.IGNORECASE)
separator_indices = [i for i, line in enumerate(lines) if is_separator(line)]
print(separator_indices)
if not separator_indices:
raise ValueError("No valid separators found in the provided Pandoc table.")
# Calculate max number of columns
delimiter_positions = []
number_of_columns = 0
for separator_index in separator_indices:
if lines[separator_index].count("+") - 1 > number_of_columns:
number_of_columns = lines[separator_index].count("+") - 1
delimiter_positions = []
for j in range(number_of_columns):
delimiter_positions_start = delimiter_positions[j - 1] if j != 0 else 0
del_positions = [lines[separator_index].find(delimiter, delimiter_positions_start + 1) for delimiter in "+" if delimiter in lines[separator_index][delimiter_positions_start + 1:]]
delimiter_positions.append(min(del_positions) if del_positions else -1)
has_header = False
for index in separator_indices:
if _matchGridTableHeaderSeparator.match(lines[index]):
has_header = True
header_separator_index = index
header_rows = []
data_rows = []
for row in range(len(separator_indices) - 1):
table_row = []
auxiliar_row = []
use_auxiliar_row = []
has_merged_cells = False
in_data_row = False
start, end = separator_indices[row], separator_indices[row + 1]
row_lines = lines[start:end] # Lines between separators including separator line start as it gives information about the number of columns of the row
if row_lines:
# Combine multiline content into single strings for each cell
for line in row_lines:
if is_separator(line) and not in_data_row:
number_of_columns_row = line.count("+") - 1
in_data_row = True
parts = re.split(r"\s*\+\s*", line.strip("+"))
# Add as many cells as columns with span attributes
delimiter_index = 0
for i in range(number_of_columns_row):
delimiter_index += len(parts[i]) + 1
table_row.append({
"content": "NOCONTENT",
"rowspan": 0,
"colspan": 0,
"colspan_adjusted": False,
"position": delimiter_index # Position of cell delimiter +
})
for i in range(number_of_columns):
auxiliar_row.append({
"content": "NOCONTENT",
"rowspan": 0,
"colspan": 0,
"colspan_adjusted": False,
"position": 0
})
use_auxiliar_row.append(False)
elif in_data_row:
# Regular data row or partial separator
if _matchGridTableBodySeparator.match(line): # Partial separator
has_merged_cells = True
cells = re.split(r"\s*[\|\+]\s*", line.strip("|").strip("+")) # (?<!\\)[\|\+]
if len(cells) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined
for i in range(len(cells)):
if _matchGridTableSeparatorLine.match(cells[i]): # A new row is to be added
use_auxiliar_row[i] = True
else:
if table_row[i]['content'] == "NOCONTENT":
table_row[i]['rowspan'] += 1
table_row[i]['colspan'] += 1
table_row[i]['content'] = cells[i]
else:
table_row[i]['content'] += cells[i]
# Cell which is not separator
table_row[i]['rowspan'] += 1
if not table_row[i]['colspan_adjusted']:
table_row[i]['colspan_adjusted'] = True
for j in range(i, len(cells)):
delimiter_start = table_row[j-1]['position'] if j != 0 else 0
positions = [line.find(delimiter, delimiter_start + 1) for delimiter in "|+" if delimiter in line[delimiter_start + 1:]]
position = min(positions) if positions else -1
if position > delimiter_positions_start[j]: # Colspan to add
table_row[i]['colspan'] += 1
elif position < delimiter_positions_start[j]:
raise ValueError("Wrong cell formatting")
else:
break
elif len(cells) == number_of_columns: # Simple row with partial separator, # A new row is to be added
for i in range(len(cells)):
if _matchGridTableSeparatorLine.match(cells[i]): # Update cell in new row
use_auxiliar_row[i] = True
else:
if table_row[i]['content'] == "NOCONTENT":
table_row[i]['rowspan'] += 1
table_row[i]['colspan'] += 1
table_row[i]['content'] = cells[i]
else:
table_row[i]['content'] += cells[i]
# Cell which is not separator
table_row[i]['rowspan'] += 1
# Not needed, no colspan as number of cells is equal to number of columns
#for j in range(i, len(cells)):
# delimiter_start = table_row[j-1]['position'] if j != 0 else 0
# positions = [line.find(delimiter,delimiter_start+1) for delimiter in "|+" if delimiter in line[delimiter_start+1:]]
# position = min(positions) if positions else -1
# if position > table_row[i]['position']: # Only colspan to be increased
# table_row[i]['colspan'] += 1
# elif position + 1 < table_row[i]['position']:
# raise ValueError("Wrong cell formatting")
# else:
# break
else:
raise ValueError("More cells than columns found")
else: # Data row
cells = re.split(r"\s*\|\s*", line.strip("|"))
if len(cells) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined
for i in range(len(cells)):
if table_row[i]['content'] == "NOCONTENT":
table_row[i]['rowspan'] += 1
table_row[i]['colspan'] += 1
table_row[i]['content'] = cells[i]
else:
table_row[i]['content'] += cells[i]
if not table_row[i]['colspan_adjusted']:
table_row[i]['colspan_adjusted'] = True
for j in range(i, len(cells)):
delimiter_start = table_row[j-1]['position'] if j != 0 else 0
if line.find("|", delimiter_start+1) > delimiter_positions[j]: # Colspan to be increased
table_row[i]['colspan'] += 1
elif line.find("|", delimiter_start+1) < delimiter_positions[j]:
raise ValueError("Wrong cell formatting")
else:
break
elif len(cells) == number_of_columns: # Simple row
for i in range(len(cells)):
if use_auxiliar_row[i]:
if auxiliar_row[i]['content'] == "NOCONTENT":
auxiliar_row[i]['rowspan'] += 1
auxiliar_row[i]['colspan'] += 1
auxiliar_row[i]['content'] = cells[i]
else:
auxiliar_row[i]['content'] += cells[i]
else:
if table_row[i]['content'] == "NOCONTENT":
table_row[i]['rowspan'] += 1
table_row[i]['colspan'] += 1
table_row[i]['content'] = cells[i]
else:
table_row[i]['content'] += cells[i]
else:
raise ValueError("More cells than columns found")
else:
raise ValueError("No separator line found for row starting")
if has_header and start >= header_separator_index: # table_row and auxiliar_row are part of data_rows
data_rows.append(table_row)
if has_merged_cells:
data_rows.append(auxiliar_row)
elif has_header and start < header_separator_index: # table_row and auxiliar_row are part of header_rows
header_rows.append(table_row)
if has_merged_cells:
header_rows.append(auxiliar_row)
#print(header_rows)
#print(data_rows)
# Correct newlines characters
for row in header_rows:
for cell in row:
cell['content'] = cell['content'].replace("\\", "<br>")
for row in data_rows:
for cell in row:
cell['content'] = cell['content'].replace("\\", "<br>")
# Check if there are any data rows
if not data_rows and not header_rows:
raise ValueError("No valid rows found in the provided Pandoc table.")
# Format text
bold = "<strong>"
for row in header_rows:
for cell in row:
while cell['content'].find("**") != -1:
cell['content'] = cell['content'].replace("**", bold, 1)
if bold == "<strong>":
bold = "</strong>"
else:
bold = "<strong>"
bold = "<strong>"
for row in data_rows:
for cell in row:
while cell['content'].find("**") != -1:
cell['content'] = cell['content'].replace("**", bold, 1)
if bold == "<strong>":
bold = "</strong>"
else:
bold = "<strong>"
# Checking that the grid is correct Not too much tested - need to take into account rowspan of previous rows
forward_rowspan = []
for row_index in range(len(header_rows)):
if len(forward_rowspan) == 0:
forward_rowspan = [0 for _ in range(len(header_rows[row_index]))]
sum = 0
for cell_index in range(len(header_rows[row_index])):
sum += header_rows[row_index][cell_index]['colspan']
if row_index > 0 and header_rows[row_index][cell_index]['colspan'] == 0:
if forward_rowspan[cell_index] > 0:
sum += 1
forward_rowspan[cell_index] -= 1
if forward_rowspan[cell_index] == 0 and header_rows[row_index][cell_index]['rowspan'] > 1:
forward_rowspan[cell_index] = header_rows[row_index][cell_index]['rowspan'] -1
if not sum == number_of_columns:
raise ValueError("Grid table not converted properly")
forward_rowspan = []
for row_index in range(len(data_rows)):
if len(forward_rowspan) == 0:
forward_rowspan = [0 for _ in range(len(data_rows[row_index]))]
sum = 0
for cell_index in range(len(data_rows[row_index])):
sum += data_rows[row_index][cell_index]['colspan']
if row_index > 0 and data_rows[row_index][cell_index]['colspan'] == 0:
if forward_rowspan[cell_index] > 0:
sum += 1
forward_rowspan[cell_index] -= 1
if forward_rowspan[cell_index] == 0 and data_rows[row_index][cell_index]['rowspan'] > 1:
forward_rowspan[cell_index] = data_rows[row_index][cell_index]['rowspan'] - 1
if not sum == number_of_columns:
raise ValueError("Grid table not converted properly")
#if has_header:
# table_with_spans = header_rows
#table_with_spans += data_rows
#return table_with_spans
return header_rows, data_rows
def generate_html_table_with_spans(pandoc_table):
"""
Generate an HTML table from a Pandoc-style grid table with row and column spans.
:param pandoc_table: String of the Pandoc-style grid table.
:return: HTML string.
"""
grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table)
html = "<table>\n"
has_header = False
for row in grid_header:
for cell in row:
if cell['rowspan'] != 0 and cell['colspan'] != 0:
has_header = True
if has_header:
html += " <thead>\n"
for row in grid_header:
html += " <tr>\n"
for cell in row:
if cell['rowspan'] == 0 or cell['colspan'] == 0:
continue
else:
rowspan = f" rowspan=\"{cell['rowspan']}\"" if cell["rowspan"] > 1 else ""
colspan = f" colspan=\"{cell['colspan']}\"" if cell["colspan"] > 1 else ""
html += f" <td{rowspan}{colspan}>{cell['content']}</td>\n"
html += " </tr>\n"
html += " </thead>\n"
html += " <tbody>\n"
for row in grid_body:
html += " <tr>\n"
for cell in row:
if cell['rowspan'] == 0 or cell['colspan'] == 0:
continue
else:
rowspan = f" rowspan=\"{cell['rowspan']}\"" if cell["rowspan"] > 1 else ""
colspan = f" colspan=\"{cell['colspan']}\"" if cell["colspan"] > 1 else ""
html += f" <td{rowspan}{colspan}>{cell['content']}</td>\n"
html += " </tr>\n"
html += " </tbody>\n"
html += "</table>"
return html
def analyseMarkdown(filename:str) -> Document:
""" Analyse the markdown file and split it into clauses.
......@@ -473,6 +780,9 @@ def analyseMarkdown(filename:str) -> Document:
inCodefence = False
inTable = False
tableHasSeparator = False
inGridTable = False
gridTableHasSeparator = False
gridTable = ""
for line in inLines:
# Detect and handle codefences
......@@ -493,7 +803,7 @@ def analyseMarkdown(filename:str) -> Document:
continue
# Detect and handle tables
if _matchTable.match(line) and not inTable:
if _matchTable.match(line) and not inTable and not inGridTable:
inTable = True
outClauses[-1].append(Line(line, LineType.TABLEHEADER))
continue
......@@ -512,6 +822,34 @@ def analyseMarkdown(filename:str) -> Document:
outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW
# continue with other matches
#Detect grid tables and convert them to html table
if _matchGridTable.match(line) and not inGridTable:
inGridTable = True
#outClauses[-1].append(Line(line, LineType.TABLEHEADER))
gridTable += line
continue
if inGridTable:
if _matchGridTableHeaderSeparator.match(line) or _matchGridTableBodySeparator.match(line):
#outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
gridTable += line
continue
elif _matchTable.match(line):
#outClauses[-1].append(Line(line, LineType.TABLEROW))
gridTable += line
continue
else:
inGridTable = False
# Mark the previous line as the last row in the table
#outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW
print(gridTable)
htmltable = ""
htmltable = generate_html_table_with_spans(gridTable)
print(htmltable)
for row in htmltable:
outClauses[-1].append(Line(row, LineType.TABLEROW))
gridTable = ""
# continue with other matches
# Detect notes
# Notes are lines that start with a '>'.
if _matchNote.match(line):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment