Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • addDiffCR
  • clause0
  • conflicts
  • detailsConflicts
  • devel
  • gridtables
  • master
  • miguel
  • newCoversheet
  • publication
  • restructure
  • splitting
  • testTables
  • toMkdocs
  • upgradeIndex
  • using_pages
  • workitems
17 results

Target

Select target project
  • tools/scripts
1 result
Select Git revision
  • addDiffCR
  • clause0
  • conflicts
  • detailsConflicts
  • devel
  • gridtables
  • master
  • miguel
  • newCoversheet
  • publication
  • restructure
  • splitting
  • testTables
  • toMkdocs
  • upgradeIndex
  • using_pages
  • workitems
17 results
Show changes
Commits on Source (30)
...@@ -3,3 +3,4 @@ ...@@ -3,3 +3,4 @@
*/ts-* */ts-*
*/.python-version */.python-version
.python-version .python-version
toMkdocs/__pycache__
BSD 3-Clause License BSD 3-Clause License
Copyright (c) 2024, Miguel Angel Reina Ortega Copyright (c) 2024, Miguel Angel Reina Ortega & Andreas Kraft
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met: modification, are permitted provided that the following conditions are met:
......
#
# processMDSpec.py
#
# (c) 2025 by Andreas Kraft
# License: BSD 3-Clause License. See the LICENSE file for further details.
#
""" This script processes markdown specification files. It handles the
include statements and the front matter. It can also render the markdown
content on console or output the front matter only.
"""
from __future__ import annotations
_print = print # save the original print function
from typing import Tuple, Generator
import argparse
from rich import markdown, print
import re, sys, yaml, os
from contextlib import contextmanager
_frontMatter:dict = {}
_includeStack:list[str] = []
@contextmanager
def includeStack(filename:str) -> Generator [None, None, None]:
""" Handle the include stack.
This is used to detect circular includes and to keep track of the
include stack.
Args:
filename: The name of the file being processed.
Raises:
Exception: If a circular include is detected.
Returns:
Generator: A generator that yields nothing.
"""
if filename in _includeStack:
raise Exception(f'Circular include detected: {" -> ".join(_includeStack)} -> {filename}')
_includeStack.append(filename)
yield
_includeStack.pop()
def expandPaths(lines:list[str], currentPath:str, childPath:str) -> list[str]:
""" Expand the paths in the markdown file. This means that all paths in links,
images, and include statements are extended so that they would be valid paths
from the root document.
Args:
lines: The lines of the markdown file.
currentPath: The current path of the file being processed.
childPath: The path of the child file being processed.
Returns:
list[str]: The lines of the markdown file with expanded paths.
"""
# Replace all relative paths in the markdown with the new path
# add a path to the current path
if currentPath[-1] != '/':
currentPath += '/'
newPath = currentPath + childPath
# Remove the leading './' from the path
while newPath.startswith('./'):
newPath = newPath[2:]
inCodeFence = False
for index, line in enumerate(lines):
# Ignore stuff in code fences
if re.match(r'^\s*```.*', line):
inCodeFence = not inCodeFence
continue
if inCodeFence:
continue
# handle the links in a line (there could be multiple links in a line)
links = re.findall(r'\[([^\]]+)\]\(([^\)]+)\)', line)
for linkText, linkPath in links:
# Skip URLs and absolute paths
if linkPath.startswith(('http://', 'https://', '/')):
continue
# Construct the new path by adding addedPath to the original path
newLinkPath = linkPath[2:] if linkPath.startswith('./') else linkPath
# Create the updated path
updatedPath = f"{newPath}{linkPath}" if newPath.endswith('/') else f"{newPath}/{newLinkPath}"
# Replace the original link with the updated one in the markdown
line = line.replace(f'[{linkText}]({linkPath})', f'[{linkText}]({updatedPath})')
# handle the include statements (there should only be one per line)
includes = re.findall(r'^\s*::include{file=([^\}]+)}', line)
for includePath in includes:
# Construct the new path by adding addedPath to the original path
includePath = includePath[2:] if includePath.startswith('./') else includePath
# Create the updated path
updatedPath = f'{newPath}{includePath}' if newPath.endswith('/') else f'{newPath}/{includePath}'
# Replace the original include with the updated one in the markdown
line = line.replace(f'::include{{file={includePath}}}', f'::include{{file={updatedPath}}}')
lines[index] = line
return lines
def processFrontMatter(lines:list[str], args:argparse.Namespace) -> Tuple[dict, list[str]]:
""" Process the front matter of a markdown file. This includes extracting
the front matter information and returning it as a dictionary.
Currently only YAML front matter is supported. It can be extended later.
Args:
lines: The lines of the markdown file.
args: The command line arguments.
Raises:
yaml.YAMLError: If the front matter cannot be parsed as YAML.
Returns:
dict: The front matter information as a dictionary.
list[str]: The lines of the markdown file without the front matter.
"""
if not lines or not lines[0].startswith('---'):
return {}, lines
frontMatterLines:list[str] = []
for line in lines[1:]:
if re.match(r'^---\s*', line):
break
frontMatterLines.append(line)
# Remove the front matter from the lines
lines = lines[len(frontMatterLines)+2:]
# Parse the front matter as YAML
try:
return yaml.safe_load(''.join(frontMatterLines)), lines
except yaml.YAMLError as e:
print(f'[red]Error parsing front matter: {e}')
raise
def processFile(args:argparse.Namespace) -> str:
""" Handle the include statements in the markdown files. This includes
processing the include statements and removing the include statements
from the markdown files.
Args:
args: The command line arguments.
Raises:
Exception: If the file cannot be processed.
Returns:
The processed markdown content as a string.
"""
def handleIncludesForFile(filename:str, currentPath:str) -> str:
""" Read a single markdown file and return its content.
Args:
filename: The name of the file to read.
Raises:
FileNotFoundError: If the file cannot be found.
Returns:
The content of the file.
"""
# Get the directory path from the filename
dirname = os.path.dirname(filename)
if dirname and not dirname.endswith('/'):
dirname = dirname + '/'
dirname = dirname if dirname else '.'
currentPath = currentPath if currentPath else '.'
filename = os.path.normpath(filename)
with includeStack(filename):
try:
with open(filename, 'r') as f:
lines = f.readlines()
except FileNotFoundError:
print(f'[red]File not found: {filename}')
raise
# Expand the paths in the markdown file
# extract front matter information
lines = expandPaths(lines, currentPath, dirname)
fm, lines = processFrontMatter(lines, args)
if fm:
_frontMatter[filename] = fm
if not args.doInclude:
return ''.join(lines)
inCodeFence = False
for line in lines:
# Ignore stuff code fences
if re.match(r'^\s*```.*', line):
inCodeFence = not inCodeFence
continue
if inCodeFence:
continue
# Check for ::include{file=...} pattern using regex at the beginning of a line
match = re.search(r'^::include\{\s*file=(.*?)\s*\}', line.strip())
if match:
includeFilename = match.group(1)
# Read the included file and replace the include statement with its content
lines[lines.index(line)] = handleIncludesForFile(includeFilename, os.path.dirname(filename))
return ''.join(lines)
return handleIncludesForFile(args.document, os.path.dirname(args.document))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Process markdown specification files.')
parser.add_argument('--no-include', dest='doInclude', action='store_false', default=True, help="don't process include statements")
parser.add_argument('--render-markdown', '-md', dest='renderAsMarkdown', action='store_true', help='render output as markdown')
parser.add_argument('--process-frontmatter', '-fm', dest='outputFrontMatter', action='store_true', help='output front matter only')
parser.add_argument('--frontmatter-only', '-fmo', dest='onlyFrontMatter', action='store_true', help='output only front matter')
parser.add_argument('--verbose', '-v', action='store_true', help='print debug information to stderr.')
parser.add_argument('document', type=str, help='a markdown specification document to process')
args = parser.parse_args()
if args.verbose:
if not args.doInclude:
print(f'[yellow]Skipping processing include statements', file=sys.stderr)
else:
print(f'[green]Processing include statements', file=sys.stderr)
try:
lines = processFile(args)
except Exception as e:
print(f'[red]Error while processing {args.document}\n{e}', file=sys.stderr)
quit(1)
if args.outputFrontMatter or args.onlyFrontMatter:
# Collect front matter information in the output
if not args.onlyFrontMatter:
print('---')
# The following is a workaround to keep the order of the dictionary
# see https://stackoverflow.com/a/52621703
yaml.add_representer(dict, lambda self, data: yaml.representer.SafeRepresenter.represent_dict(self, data.items()))
print(yaml.dump(_frontMatter, default_flow_style=False), end='')
if not args.onlyFrontMatter:
print('---')
if not args.onlyFrontMatter:
if args.renderAsMarkdown:
# Render the markdown content
print(markdown.Markdown(lines))
else:
# Print the raw markdown content
_print(lines)
#
# gridTableFilter.py
#
# (c) 2025 by Andreas Kraft & Miguel Angel Reina Ortega
# License: BSD 3-Clause License. See the LICENSE file for further details.
#
""" This script replaces the grid tables in the markdown files with the equivalent
html tables. Other markdown elements are not affected and are passed through.
The script expects the markdown file to be converted from stdin and writes the
result to stdout.
"""
import argparse, sys
from markdownTools import analyseMarkdown, setLoggers
def main() -> None:
# Parse the command line arguments
parser = argparse.ArgumentParser(description='Convert grid tables to html tables. This script reads the markdown file from stdin and writes the result to stdout.')
parser.add_argument('-v', '--verbose', action='store_true', help='Print debug information to stderr.')
args = parser.parse_args()
# Set the loggers
setLoggers(info=lambda m: print(f'[green]{m}', file=sys.stderr) if args.verbose else None,
debug=lambda m: print(f'[dim]{m}', file=sys.stderr) if args.verbose else None,
error=lambda m: print(f'[red]{m}', file=sys.stderr) if args.verbose else None)
# Read the input from stdin and write the result to stdout
print(analyseMarkdown(inLines=sys.stdin.readlines()), file=sys.stdout)
if __name__ == '__main__':
main()
#
# gridTableTools.py
#
# (c) 2025 by Miguel Angel Reina Ortega & Andreas Kraft
# License: BSD 3-Clause License. See the LICENSE file for further details.
#
""" Tools for working with grid tables in markdown files. """
from typing import Optional, Callable
from regexMatches import *
_alignLeft = 'align="left"'
_alignRight = 'align="right"'
_alignCenter = 'align="center"'
_nextListElementMark = '' # Marks a continuing list in the line before. !!! Must be a single character
printInfo = print
printDebug = print
printError = print
def setLoggers(info:Callable=print, debug:Callable=print, error:Callable=print) -> None:
global printInfo, printDebug, printError
printInfo = info
printDebug = debug
printError = error
class GridCell:
""" Represents a grid table cell. """
def __init__(self) -> None:
""" Initialize a new grid table cell.
"""
self.content:Optional[str] = None
self.rowspan:int = 0
self.colspan:int = 0
self.colspanAdjusted:bool = False
self.alignment:str = 'align="center"'
self.positionStart:Optional[int] = None
self.position:Optional[int] = None
self.listFlag:bool = False
self.auxiliarIndex:int = 0
def calculateAndSetAlignment(self,
headerDelimiterPositions:list[int],
delimiterPositions:list[int],
defaultAlignments:list[str],
hasHeader:bool) -> None:
""" Set the alignment of the cell based on the position of the delimiter.
Args:
headerDelimiterPositions: The positions of the header delimiters.
delimiterPositions: The positions of the delimiters.
defaultAlignments: The default alignments.
hasHeader: True if the table has a header, False otherwise.
"""
if self.position is None or self.positionStart is None:
raise ValueError('Cell position must be set before calculating alignment.')
if hasHeader:
headerDelimiterIndex = 0
while headerDelimiterIndex < len(defaultAlignments) and self.positionStart > headerDelimiterPositions[headerDelimiterIndex]:
headerDelimiterIndex += 1
if headerDelimiterIndex < len(defaultAlignments):
self.alignment = defaultAlignments[headerDelimiterIndex]
else:
raise ValueError('Invalid table formatting')
def __str__(self):
return f'(Content: {self.content}, Rowspan: {self.rowspan}, Colspan: {self.colspan}, Alignment: {self.alignment}, Position: {self.position}, ListFlag: {self.listFlag}, AuxiliarIndex: {self.auxiliarIndex})'
def __repr__(self):
return self.__str__()
class GridRow():
""" Represents a row in a grid table. """
cells:list[GridCell] = []
def __init__(self, length: int = 1) -> None:
self.cells = [GridCell() for _ in range(length)]
def __getitem__(self, item):
return self.cells[item]
def __setitem__(self, key, value):
self.cells[key] = value
def __str__(self):
return str(self.cells)
def __repr__(self):
return self.__str__()
class GridRowsTracker():
""" Represents the document object. """
def __init__(self, size:int) -> None:
self.gridRowTracker = [0 for _ in range(size)]
def __getitem__(self, item:int) -> int:
return self.gridRowTracker[item]
def __setitem__(self, key:int, value:int) -> None:
self.gridRowTracker[key] = value
def __str__(self):
return str(self.gridRowTracker)
def __repr__(self):
return self.__str__()
def max(self) -> int:
return max(self.gridRowTracker)
# Some type aliases
GridTableRow = list[GridCell]
GridTableRowList = list[GridTableRow]
def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableRowList]:
"""
Parse a Pandoc-style grid table into a structure for HTML conversion with rowspan and colspan.
:param pandoc_table: String of the Pandoc-style grid table.
:return: List of lists representing the table with metadata for spans.
"""
#global hasHeader, defaultAlignments, headerDelimiterPositions, delimiterPositions, nextListElementMark
# Initialize globals
hasHeader = False
defaultAlignments:list[str] = []
headerDelimiterPositions:list[int] = []
delimiterPositions:list[int] = []
# Split the input into lines
lines:list[str] = [line for line in gridTable.rstrip().split('\n')]
# Detect separator lines by pattern (it does not take into account partial separators
def isSeparator(line:str) -> bool:
return matchGridTableSeparator.match(line) is not None
# Set content on the cell - concatenating multilines, flagging lists
def handleCellContent(cell:GridCell, content:str) -> None:
_c = content.strip()
if cell.content is None: # Previous empty cell
cell.rowspan += 1
cell.colspan += 1
if _c.startswith('- '): # List in a cell
cell.listFlag = True
_c = re.sub(r'\\\s*$', '\n', _c)
cell.content = _c + _nextListElementMark # Add list element end mark to know when the list element ends
elif cell.listFlag and len(_c) > 0: # any other content when handling list is concatenated to the last list element
_c = re.sub(r'\\\s*$', '\n', _c)
cell.content = _c + _nextListElementMark #add the list element end mark
elif not _c: # empty line. separation between list and other paragraph
# cell.content = '\n' if not cell.content.endswith('\n') else ""
cell.content = '\n' # cell content is always empty / None here.
else:
cell.content = re.sub(r'\\\s*$', '\n', _c)
else: # Cell has content
if _c.startswith('- '): # List
if not cell.listFlag:
cell.content += '\n'
#cell['content'] = cell['content'].strip("\n")
cell.listFlag = True
_c = re.sub(r'\\\s*$', '\n', _c)
cell.content += _c + _nextListElementMark # Add list element end mark to know when the list element ends
elif cell.listFlag and len(_c) > 0: # any other content when handling list is concatenated to the last list element
# cell.content = cell.content.strip(nextListElementMark) #remove list element end mark
cell.content = cell.content.removesuffix(_nextListElementMark) #remove list element end mark
_c = re.sub(r'\\\s*$', '\n', _c)
cell.content += ' ' + _c + _nextListElementMark #add list element end mark
elif len(_c) == 0: # separation between list and other paragraph
if cell.listFlag:
cell.listFlag = False
cell.content += '\n\n' #end list by \n
#content = re.sub(r'\\\s*$', "\n", content.strip())
cell.content += '\n' if not cell.content.endswith('\n') else ''
else:
cell.content += ' ' + re.sub(r'\\\s*$', '\n', _c)
# Adjust colspan of a cell
def adjustColspan(row:GridRow, columnIndex:int, numberOfParts:int, line, numberOfColumns:int, delimiterPositions:list[int]) -> None:
for j in range(columnIndex, numberOfParts):
delimiterStart:Optional[int] = None
colI = columnIndex
while delimiterStart == None:
delimiterStart = row[colI - 1].position if colI > 0 else 0
colI -= 1
positions = [line.find(delimiter, delimiterStart + 1) for delimiter in "|+" if delimiter in line[delimiterStart + 1:]]
position = min(positions) if positions else -1
if position > delimiterPositions[j]: # Colspan to be increased
row[columnIndex].colspan += 1
if position == delimiterPositions[len(delimiterPositions) - 1]: # last cell in row, adjust colspan to get max number columns
colspan_allocated = row[columnIndex].colspan
row[columnIndex].colspan += numberOfColumns - colspan_allocated - columnIndex
elif position < delimiterPositions[j]:
raise ValueError("Wrong cell formatting")
else:
break
row[columnIndex].colspanAdjusted = True # Mark cell as adjusted
def checkDelimiterAlignment(line: str, delimiterPositions:list[int], delimiters: str = "|+") -> bool:
"""
Check if delimiters in a row align with expected positions.
Args:
line: The line of text to check
delimiter_positions: List of expected positions (based on + characters)
delimiters: String containing valid delimiter characters (default: "|+")
Returns:
bool: True if delimiters align correctly, False otherwise
"""
if not line or not delimiterPositions:
return False
printDebug(f'\nChecking line: "{line}"')
printDebug(f'Expected delimiter positions: {delimiterPositions}')
# For full separator lines (only +)
if '+' in line and '|' not in line:
currentPositions = [i for i, char in enumerate(line) if (char == '+' and i > 0)]
printDebug(f'Full separator line - Found + at positions: {currentPositions}')
return all(delimiterPositions[-1] in currentPositions and line.startswith('+') and pos in delimiterPositions
for pos in currentPositions)
# For data lines (only |)
if '|' in line and '+' not in line:
currentPositions = [i for i, char in enumerate(line) if (char == '|' and i > 0)]
printDebug(f'Data line - Found | at positions: {currentPositions}')
return all(delimiterPositions[-1] in currentPositions and line.startswith("|") and pos in delimiterPositions
for pos in currentPositions)
# For partial separators (mix of + and |)
currentPositions = [i for i, char in enumerate(line) if (char in delimiters and i > 0)]
printDebug(f'Partial separator - Found delimiters at positions: {currentPositions}')
printDebug(f'Characters at those positions: {[line[pos] for pos in currentPositions]}')
return all(delimiterPositions[-1] in currentPositions and line.startswith(('+', '|')) and pos in delimiterPositions
for pos in currentPositions)
separatorIndices = [i for i, line in enumerate(lines) if isSeparator(line)]
if not separatorIndices:
raise ValueError('No valid separators found in the provided grid table.')
# Calculate max number of columns
delimiterPositions = []
numberOfColumns:int = 0
for separatorIndex in separatorIndices:
if (_cnt := lines[separatorIndex].count('+') - 1) > numberOfColumns:
numberOfColumns = _cnt
delimiterPositions = []
for rowIndex in range(numberOfColumns):
delimiterPositionsStart = delimiterPositions[rowIndex - 1] if rowIndex != 0 else 0
delPositions = [lines[separatorIndex].find(delimiter, delimiterPositionsStart + 1)
for delimiter in '+' if delimiter in lines[separatorIndex][delimiterPositionsStart + 1:]]
delimiterPositions.append(min(delPositions) if delPositions else -1)
# Determine delimter positions and alignments
headerRows:GridTableRowList = []
dataRows:GridTableRowList = []
for index in separatorIndices:
if matchGridTableHeaderSeparator.match(lines[index]):
hasHeader = True
headerSeparatorIndex = index
parts = re.split(r'\+', lines[index].strip('+'))
#Calculate default alignments and positions of delimiters
for partIndex in range(len(parts)):
# Left alignment
if parts[partIndex].startswith(':') and not parts[partIndex].endswith(':'):
defaultAlignments.append(_alignLeft)
# Right alignment
elif not parts[partIndex].startswith(':') and parts[partIndex].endswith(':'):
defaultAlignments.append(_alignRight)
# Center alignment
else:
defaultAlignments.append(_alignCenter)
# Delimiter position
delimiterPositionsStart = delimiterPositions[partIndex - 1] if partIndex != 0 else 0
delPositions = [lines[index].find(delimiter, delimiterPositionsStart + 1)
for delimiter in '+' if delimiter in lines[index][delimiterPositionsStart + 1:]]
headerDelimiterPositions.append(min(delPositions) if delPositions else -1)
if not hasHeader:
# Set default alignments from the first separator which takes the role of header
hasHeader = True
headerSeparatorIndex = 0
parts = re.split(r'\+', lines[0].strip('+'))
# Calculate default alignments and positions of delimiters
for partIndex in range(len(parts)):
if parts[partIndex].startswith(':') and not parts[partIndex].endswith(':'):
defaultAlignments.append(_alignLeft)
elif not parts[partIndex].startswith(':') and parts[partIndex].endswith(':'):
defaultAlignments.append(_alignRight)
else:
defaultAlignments.append(_alignCenter)
# Delimiter position
delimiterPositionsStart = delimiterPositions[partIndex - 1] if partIndex != 0 else 0
delPositions = [lines[index].find(delimiter, delimiterPositionsStart + 1)
for delimiter in '+' if delimiter in lines[index][delimiterPositionsStart + 1:]]
headerDelimiterPositions.append(min(delPositions) if delPositions else -1)
#Check end table delimiter alignment (not checked during the lines processing)
if not checkDelimiterAlignment(lines[-1], delimiterPositions):
raise ValueError(f'Misaligned delimiters in end table separator: {lines[-1]}')
for rowNumber in range(len(separatorIndices) - 1):
rows:list[GridRow] = []
rowsTracker:GridRowsTracker
inDataRow = False
start, end = separatorIndices[rowNumber], separatorIndices[rowNumber + 1]
rowLines = lines[start:end] # Lines between separators including separator line start as it gives information about the number of columns of the row
if rowLines:
# Combine multiline content into single strings for each cell
for line in rowLines:
line = line.rstrip()
if isSeparator(line) and not inDataRow:
inDataRow = True
# Add delimiter alignment check for separator lines
if not checkDelimiterAlignment(line, delimiterPositions):
raise ValueError(f'Misaligned delimiters in separator row: {line}')
parts = re.split(r'\s*\+\s*', line.strip('+'))
delimiterIndex = 0
rows.append(GridRow(numberOfColumns))
rowsTracker = GridRowsTracker(numberOfColumns)
columnIndex = 0
for rowIndex in range(len(parts)):
if columnIndex in range(numberOfColumns):
delimiterIndex += len(parts[rowIndex]) + 1
cell = rows[-1][columnIndex]
# Set position
cell.positionStart = delimiterIndex - len(parts[rowIndex])
cell.position = delimiterIndex # Position of cell delimiter +
# Set alignment as defined by header separator line
cell.calculateAndSetAlignment(headerDelimiterPositions, delimiterPositions, defaultAlignments, hasHeader)
while delimiterIndex > delimiterPositions[columnIndex]:
columnIndex += 1
columnIndex += 1
elif inDataRow:
# Regular data row or partial separator
if matchGridTableBodySeparator.match(line): # Partial separator
# Add delimiter alignment check for partial separators
if not checkDelimiterAlignment(line, delimiterPositions):
raise ValueError(f'Misaligned delimiters in partial separator: {line}')
cellsContent = re.split(r'[\|\+]', line.strip('|').strip('+')) # (?<!\\)[\|\+]
#Add another row, set delimiters for each cell
rows.append(GridRow(numberOfColumns))
auxDelimiterIndex = 0
auxiliarCellIndex = 0
for columnIndex, content in enumerate(cellsContent):
if auxiliarCellIndex < numberOfColumns:
auxDelimiterIndex += len(content) + 1
cell = rows[-1][auxiliarCellIndex]
cell.positionStart = auxDelimiterIndex - len(content) # Position of cell delimiter +
cell.position = auxDelimiterIndex # Position of cell delimiter +
cell.calculateAndSetAlignment(headerDelimiterPositions, delimiterPositions, defaultAlignments, hasHeader)
while auxDelimiterIndex > delimiterPositions[auxiliarCellIndex]:
auxiliarCellIndex += 1
auxiliarCellIndex += 1
if len(cellsContent) <= numberOfColumns: # Colspan: Positions of | with respect to + need to be determined
columnCellIndex = 0
# Put the value in a variable here because we need the initial value
maxRowsTracker = rowsTracker.max()
# Go through all cells in a columnt
for columnIndex, content in enumerate(cellsContent):
rowIndex = rowsTracker[columnCellIndex]
cell = rows[rowIndex][columnCellIndex]
# Check whether a cell contains a header separator
if matchGridTableBodySeparatorLine.match(content): # A new row is to be added
rowsTracker[columnCellIndex] = maxRowsTracker + 1 # That actual row will have more than one row
rowIndex = rowsTracker[columnCellIndex]
cell = rows[rowIndex][columnCellIndex]
cell.listFlag = False
columnForward = 0
for delIndex in range(columnCellIndex, len(delimiterPositions)):
rowIndex = rowsTracker[columnCellIndex] # Correcting the rowIndex. Might have been changed by a previous iteration
if rows[rowIndex][columnCellIndex].position >= delimiterPositions[delIndex]:
columnForward += 1
#rowsTracker[columnCellIndex + columnForward - 1] = maxRowsTracker + 1 if columnForward > 1 else 0
columnCellIndex += columnForward
continue
else:
# Handle content of the cell
handleCellContent(cell, cellsContent[columnIndex])
cell.rowspan += 1
if not cell.colspanAdjusted:
# TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
adjustColspan(rows[rowIndex], columnCellIndex, numberOfColumns, line, numberOfColumns, delimiterPositions)
if cell.position >= delimiterPositions[columnCellIndex]:
columnCellIndex += cell.colspan if cell.colspan != 0 else 1
continue
else:
raise ValueError(f'More cells than columns found ({len(cellsContent)} {numberOfColumns})')
else: # Data row
cellsContent = re.split(r'\|', line.strip('|'))
# Add delimiter alignment check
if not checkDelimiterAlignment(line, delimiterPositions):
raise ValueError(f'Misaligned delimiters in row: {line}')
columnCellIndex = 0
if len(cellsContent) < numberOfColumns: # Colspan: Positions of | with respect to + need to be determined
for columnIndex, content in enumerate(cellsContent):
row = rows[rowsTracker[columnCellIndex]]
cell = row[columnCellIndex]
# Handle content of the cell
handleCellContent(cell, content)
if not cell.colspanAdjusted:
#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
adjustColspan(row, columnCellIndex, numberOfColumns, line, numberOfColumns, delimiterPositions)
if cell.position >= delimiterPositions[columnCellIndex]:
columnCellIndex += cell.colspan # Move forward index i
elif len(cellsContent) == numberOfColumns: # Simple row
for columnIndex, content in enumerate(cellsContent):
rowIndex = rowsTracker[columnIndex]
handleCellContent(rows[rowIndex][columnIndex], content)
else:
raise ValueError(f'More cells than columns found ({len(cellsContent)} {numberOfColumns})')
else:
raise ValueError('No separator line found for row starting')
if hasHeader and start >= headerSeparatorIndex: # table_row and auxiliar_row are part of data_rows
for row in rows:
dataRows.append(row.cells)
elif hasHeader and start < headerSeparatorIndex: # table_row and auxiliar_row are part of header_rows
for row in rows: # header rows
headerRows.append(row.cells)
else:
#only body
for row in rows:
dataRows.append(row.cells)
# Check if there are any data rows
if not dataRows and not headerRows:
raise ValueError('No valid rows found in the provided grid table.')
# Format text
for gridRows in [headerRows, dataRows]:
for gridRow in gridRows:
for cell in gridRow:
if cell.content is not None:
# Replacing "<" by &lt;
cell.content = cell.content.replace('<', '&lt;')
# Bold replacements
# Regex to detect markdown bold formatting in cell content
if cell.content is not None:
cell.content = matchBold.sub(r'\1<strong>\g<text></strong>', cell.content)
# Italic replacements
# Regex to detect markdown italic formatting in cell content
if cell.content is not None:
cell.content = matchItalic.sub(r'\1<i>\g<text></i>', cell.content)
# Correct newlines characters
for headerRow in headerRows:
for cell in headerRow:
cell.content = cell.content.replace('\n', '<br />') if cell.content is not None else None
for dataRow in dataRows:
for cell in dataRow:
cell.content = cell.content.replace('\n', '<br />') if cell.content is not None else None
#
# Checking that the grid is correct Not too much tested - need to take into account rowspan of previous rows
#
# Checking the header rows
forwardRowspan:list[int] = []
for idx, headerRow in enumerate(headerRows):
if len(forwardRowspan) == 0:
forwardRowspan = [0] * len(headerRows[idx])
sum = 0
for cellIndex, cell in enumerate(headerRow):
sum += cell.colspan
if idx > 0 and cell.colspan == 0:
if forwardRowspan[cellIndex] > 0:
sum += 1
forwardRowspan[cellIndex] -= 1
if forwardRowspan[cellIndex] == 0 and cell.rowspan > 1:
forwardRowspan[cellIndex] = cell.rowspan -1
colspan=1
while cell.colspan > colspan:
forwardRowspan[cellIndex + colspan] = cell.rowspan - 1
colspan += 1
if not sum == numberOfColumns:
raise ValueError('Grid table not converted properly')
# Checking the data rows
forwardRowspan = []
for idx, dataRow in enumerate(dataRows):
if len(forwardRowspan) == 0:
forwardRowspan = [0] * len(dataRows[idx])
sum = 0
for cellIndex, cell in enumerate(dataRows[idx]):
sum += cell.colspan
if idx > 0 and cell.colspan == 0:
if forwardRowspan[cellIndex] > 0:
sum += 1
forwardRowspan[cellIndex] -= 1
if forwardRowspan[cellIndex] == 0 and cell.rowspan > 1:
forwardRowspan[cellIndex] = cell.rowspan - 1
colspan=1
while cell.colspan > colspan:
forwardRowspan[cellIndex + colspan] = cell.rowspan - 1
colspan += 1
if not sum == numberOfColumns:
raise ValueError('Grid table not converted properly')
return headerRows, dataRows
def generateHtmlTableWithSpans(gridTable:str) -> str:
""" Generate an HTML table from a Pandoc-style grid table with row and column spans.
Args:
gridTable: The Pandoc-style grid table.
Returns:
The HTML table in string format.
"""
regex1 = r'\s*([-*+]|\s*\d+\.)\s+((?:(?!' + re.escape(_nextListElementMark) + r').)+)' + re.escape(_nextListElementMark)
regex2 = r'(\s*([-*+]|\s*\d+\.)\s+(?:(?!∆).)+' + re.escape(_nextListElementMark) + r')+'
try:
gridHeader, gridBody = parseGridTableWithSpans(gridTable)
except Exception as e:
printDebug('Grid table could not be generated')
raise RuntimeError(f'HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE:\n{str(e)}')
# Generate table HTML...
html = '<table>\n'
hasHeader = False
for row in gridHeader:
for cell in row:
if cell.rowspan != 0 and cell.colspan != 0:
hasHeader = True
break
if hasHeader:
html += ' <thead>\n'
for row in gridHeader:
html += " <tr>\n"
for cell in row:
if cell.rowspan == 0 or cell.colspan == 0:
continue
else:
# Prepare content, in case there's a list
if cell.content is not None and (matches := re.findall(regex1, cell.content)): # Update cell in new row
list = '<ul>'
# Build list the matches
for match in matches:
list += '<li>' + match[1] + '</li>'
list += '</ul>'
cell.content = re.sub(regex2, list, cell.content)
# Enforce left alignment if cell contains a list
cell.alignment = _alignLeft
rowspan = f' rowspan="{cell.rowspan}"' if cell.rowspan > 1 else ''
colspan = f' colspan="{cell.colspan}"' if cell.colspan > 1 else ''
html += f' <th{rowspan}{colspan} {cell.alignment}>{cell.content}</th>\n'
html += ' </tr>\n'
html += ' </thead>\n'
html += ' <tbody>\n'
for row in gridBody:
html += ' <tr>\n'
for cell in row:
if cell.rowspan == 0 or cell.colspan == 0:
continue
else:
#Prepare content, in case there's a list
if cell.content is not None and (matches := re.findall(regex1, cell.content)): # Update cell in new row
list = '<ul>'
# Build list the matches
for match in matches:
list += f'<li>{match[1]}</li>'
list += '</ul>'
cell.content = re.sub(regex2, list, cell.content)
# Enforce left alignment if cell contains a list
cell.alignment = _alignLeft
rowspan = f' rowspan="{cell.rowspan}"' if cell.rowspan > 1 else ''
colspan = f' colspan="{cell.colspan}"' if cell.colspan > 1 else ''
html += f' <td{rowspan}{colspan} {cell.alignment}>{cell.content}</td>\n'
html += ' </tr>\n'
html += ' </tbody>\n'
html += '</table>'
return html
#
# markdownTools.py
#
# (c) 2025 by Andreas Kraft & Miguel Angel Reina Ortega
# License: BSD 3-Clause License. See the LICENSE file for further details.
""" Various tools for markdown processing
"""
from __future__ import annotations
from typing import Callable, Optional
from dataclasses import dataclass
import base64, hashlib
from enum import Enum, auto
from gridTableTools import generateHtmlTableWithSpans, setLoggers as setGridTableLoggers
from regexMatches import *
# TODO use a verbosity level instead
verbose = False
veryVerbose = False
printInfo = print
printDebug = print
printError = print
def setLoggers(info:Callable = print, debug:Callable = print, error:Callable= print) -> None:
global printInfo, printDebug, printError
printInfo = info
printDebug = debug
printError = error
# Set the loggers for the grid table tools
setGridTableLoggers(info, debug, error)
def _shortHash(value:str, length:int) -> str:
""" Generate a short hash of a string value.
Args:
value: The value to hash.
length: The length of the hash.
Returns:
The hash.
"""
return base64.b64encode(
hashlib.sha256(
value.encode()
).digest()
).decode()[:length]
class LineType(Enum):
""" Represents the type of a line in the markdown file. """
HEADING = auto()
TEXT = auto()
CODEFENCESTART = auto()
CODE = auto()
CODEFENCEEND = auto()
LIST = auto()
NOTE = auto()
STANDALONEIMAGE = auto()
TABLEHEADER = auto()
TABLESEPARATOR = auto()
TABLEROW = auto()
TABLELASTROW = auto()
RAWHTML = auto()
@dataclass
class Line:
""" Represents a line in the markdown file. """
text:str = '\n'
lineType:LineType = LineType.TEXT
def __str__(self) -> str:
""" Return the line as a string. """
return self.text
def __repr__(self) -> str:
""" Return the line as a string. """
return self.__str__()
@dataclass
class Clause:
""" Represents a clause in the markdown file. """
_level:int
_clauseNumber:str
_title:str
_lines:list[Line]
@property
def level(self) -> int:
""" Return the level of the clause. """
return self._level
@property
def clauseNumber(self) -> str:
""" Return the clause number. """
return self._clauseNumber if self._clauseNumber else '0'
@clauseNumber.setter
def clauseNumber(self, value:str) -> None:
""" Set the clause number. """
self._clauseNumber = value
@property
def title(self) -> str:
""" Return the title of the clause. """
return self._title
@title.setter
def title(self, value:str) -> None:
""" Set the title of the clause. """
self._title = value
@property
def lines(self) -> list[Line]:
""" Return the lines of the clause. """
return self._lines
@lines.setter
def lines(self, value:list[Line]) -> None:
""" Set the lines of the clause. """
self._lines = value
@property
def linesCount(self) -> int:
""" Return the number of lines in the clause.
Returns:
The number of lines in the clause.
"""
return len(self.lines)
def append(self, line:Line) -> None:
""" Append a line to the clause.
Args:
line: The line to append.
"""
self.lines.append(line)
def extend(self, clause:Clause) -> None:
""" Extend the clause with the lines of another clause.
Args:
clause: The clause to extend with.
"""
self.lines.extend(clause.lines)
def asStringList(self, paddings:int = 0) -> list[str]:
""" Return the clause as a list of strings.
Args:
paddings: The number of empty lines to add before the clause.
Returns:
The clause's lines as a list of strings.
"""
return [ '\n' for _ in range(paddings) ] + [ l.text for l in self.lines ]
def __len__(self) -> int:
""" Return the number of characters in the clause. This does not include
empty lines or lines that contain only whitespace.
Returns:
The number of characters in the clause.
"""
return sum([ len(l.text.strip()) for l in self.lines ])
def __str__(self) -> str:
""" Return the clause as a string. """
return ''.join([str(l) for l in self.lines ])
def __repr__(self) -> str:
""" Return the clause as a string. """
return self.__str__()
class Footnote:
""" Represents a footnote in the markdown file. """
def __init__(self, id:str, line:Line) -> None:
""" Constructor.
Args:
id: The id of the footnote.
line: The line of the footnote.
"""
self.id = id
""" The id of the footnote. """
self.line = line
""" The line of the footnote. """
def __str__(self) -> str:
return self.line.text
def __repr__(self) -> str:
return self.__str__()
class Document:
""" Represents the document object. """
clauses:list[Clause] = []
footnotes:list[Footnote] = []
def __init__(self, clauses:list[Clause], footnotes:list[Footnote] = []) -> None:
self.clauses = clauses
self.footnotes = footnotes
def splitMarkdownDocument(self,
ignoreTitles:list[str] = [],
splitLevel:int = 1,
ignoreUntilFirstHeading:bool = False) -> None:
""" Split the clauses at a certain level. This is used to create the separate
markdown files for MkDocs.
After the split, the clauses are stored in the document object.
Args:
ignoreTitles: A list of titles that should be ignored. They are not included in the output.
splitLevel: The level at which the clauses should be split.
ignoreUntilFirstHeader: Ignore all clauses until the first heading.
"""
result:list[Clause] = []
ignoreTitles = [ t.casefold() for t in ignoreTitles ] # convert to lower case
for clause in self.clauses:
level = clause.level
# Check if the current clause should be ignored
if clause.title.casefold() in ignoreTitles:
continue
# Add a new output clause if the current clause's level is
# equal or less than the split level
if clause.level <= splitLevel:
result.append(Clause(level, clause.clauseNumber, clause.title, []))
# Add the lines to the output clause
result[-1].extend(clause)
# Remove the first clause if it has no title
if ignoreUntilFirstHeading:
while len(result[0].title) == 0:
result.pop(0)
self.clauses = result
def insertFootnotes(self) -> None:
""" Insert footnotes into the clauses.
After the insertion, the clauses are stored in the document object.
"""
printInfo('Adding footnotes to clauses')
for clause in self.clauses:
foundFootnotes:list[Footnote] = []
for line in clause.lines:
# ATTN: Only footnotes in normal text lines are checked
if line.lineType == LineType.TEXT and (fn := MatchInlineFootnote.search(line.text)):
# Find the footnote in the list of footnotes
for f in self.footnotes:
if f.id == fn.groups()[0]:
foundFootnotes.append(f)
# Insert the footnotes at the end of the clause
if len(foundFootnotes) > 0:
clause.append(Line('\n', LineType.TEXT))
for f in foundFootnotes:
clause.append(f.line)
def updateLinks(self) -> None:
""" Update the links in the clauses to the new structure. This is done by
creating a dictionary of all links and their targets and then replacing
the links in the clauses.
After the update, the clauses are stored in the document object.
"""
printInfo('Updating links in clauses')
# Build the link target dictionary. Mapping anchor -> clause
linkTargets:dict[str, Clause] = {}
# Find all Markdown headers in the clauses and convert them to anchor format
for i, clause in enumerate(self.clauses):
# Find all headers in the clause
for line in clause.lines:
if (m := matchHeader.match(line.text)):
# convert the header to anchor format and add it to the dictionary
# Remove special characters
# TODO move perhaps to an own function
anchor = m.groups()[1].strip().casefold().replace(' ', '-')
for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'):
anchor = anchor.replace(c, '')
# remove html tags from the anchor
anchor = re.sub(matchHtmlTag, '', anchor)
linkTargets[f'#{anchor}'] = clause
if veryVerbose:
printDebug(f'Added Markdown anchor "{anchor}"')
# Find all HTML anchors in the clauses and add them to the dictionary
for i, clause in enumerate(self.clauses):
for line in clause.lines:
if (anchors := matchHtmlAnchorLink.findall(line.text)):
for a in anchors:
linkTargets[f'#{a}'] = clause
if veryVerbose:
printDebug(f'Found HTML anchor "{a}" in clause "{clause.title}"')
# Replace the html links
for clause in self.clauses:
for i, line in enumerate(clause.lines):
if (links := matchHtmlLink.findall(line.text)):
for lnk in links:
if lnk in linkTargets:
line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[lnk].clauseNumber}/#{lnk[1:]}') # Update the current line as well
if veryVerbose:
printDebug(f'Updated HTML link "{lnk}" in clause "{clause.title}"')
# Replace the markdown links
for clause in self.clauses:
for i, line in enumerate(clause.lines):
if (links := markdownLink.findall(line.text)):
# Replace the old link targets with converted
# (lower case) versions that point to the output files
for lnk in links:
_lnk =lnk.casefold()
if _lnk in linkTargets:
line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[_lnk].clauseNumber}/#{lnk[1:]}') # Update the current line as well
if veryVerbose:
printDebug(f'Updated Markdown link "{lnk}" in clause "{clause.title}"')
def updateNotes(self) -> None:
""" Update the notes in the clauses to the mkDocs notes version.
After the update, the clauses are stored in the document object.
"""
printInfo('Updating notes in clauses')
for clause in self.clauses:
lines:list[Line] = []
inNote = False
for line in clause.lines:
if line.lineType == LineType.NOTE:
if not inNote:
lines.append(Line('\n', LineType.TEXT))
lines.append(Line('!!! note\n', LineType.NOTE))
inNote = True
lines.append(Line(f"\t{re.sub(matchNoteStart, '', line.text)}", LineType.NOTE))
if verbose:
printDebug(f'Converted note in clause "{clause.title}"')
else:
if inNote:
lines.append(Line('\n', LineType.TEXT))
inNote = False
lines.append(line)
clause.lines = lines
def __str__(self) -> str:
""" Return the document as a string. """
return '\n'.join([ str(c) for c in self.clauses + self.footnotes ])
def __repr__(self) -> str:
""" Return the document as a string. """
return self.__str__()
def analyseMarkdown(filename:Optional[str]=None, inLines:Optional[list[str]]=None) -> Document:
""" Analyse the markdown file and split it into clauses.
Either the filename or the inLines must be provided.
Args:
filename: The name of the markdown file.
inLines: The lines of the markdown file.
Returns:
The document object.
"""
gridTable:str = ''
def processGridTable() -> None:
""" Process a grid table and convert it to an html table.
This function adds the html table to the output clauses and
clears the gridTable variable.
"""
nonlocal gridTable
htmltable:str = ''
try:
htmltable = generateHtmlTableWithSpans(gridTable)
printDebug(htmltable)
except Exception as e:
printError(f"Error: {e}")
outClauses[-1].append(Line(htmltable, LineType.RAWHTML))
gridTable = ''
printInfo(f'Analyzing "{filename}"')
# Read the file.
# Note: We use utf-8 and replace errors to avoid problems with special or unknown characters.
if filename and not inLines:
with open(filename, 'r', encoding = 'utf-8', errors = 'replace') as file:
inLines = file.readlines()
elif not filename and inLines:
pass
else:
raise ValueError('Either the filename or the lines must be provided.')
# The list of clauses. The first clause contains the text before the first heading.
outClauses:list[Clause] = [Clause(0, '', '', [])]
footnotes:list[Footnote] = []
# Go through the lines and detect headers and codefences
inCodefence = False
inTable = False
tableHasSeparator = False
inGridTable = False
for line in inLines:
# Detect and handle codefences
# For the moment we support only codefences that start and end
# with 3 backticks. This is the most common way to define codefences.
# Note, that longer codefences are allowed by the markdown specification.
if matchCodefenceStart.match(line) and not inCodefence:
inCodefence = True
outClauses[-1].append(Line(line, LineType.CODEFENCESTART))
continue
if matchCodefenceEnd.match(line):
inCodefence = False
outClauses[-1].append(Line(line, LineType.CODEFENCEEND))
continue
if inCodefence:
outClauses[-1].append(Line(line, LineType.CODE))
continue
# Detect and handle tables
if matchTable.match(line) and not inTable and not inGridTable:
inTable = True
outClauses[-1].append(Line(line, LineType.TABLEHEADER))
continue
if inTable:
if matchTableSeparator.match(line) and not tableHasSeparator:
outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
tableHasSeparator = True
continue
elif matchTable.match(line):
outClauses[-1].append(Line(line, LineType.TABLEROW))
continue
else:
inTable = False
tableHasSeparator = False
# Mark the previous line as the last row in the table
outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW
# continue with other matches
#Detect grid tables and convert them to html table
if matchGridTable.match(line) and not inGridTable:
inGridTable = True
#outClauses[-1].append(Line(line, LineType.TABLEHEADER))
gridTable += line
continue
if inGridTable:
if matchGridTableHeaderSeparator.match(line) or matchGridTableBodySeparator.match(line):
#outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
gridTable += line
continue
elif matchTable.match(line):
#outClauses[-1].append(Line(line, LineType.TABLEROW))
gridTable += line
continue
else:
inGridTable = False
processGridTable()
# continue with other matches
# Detect notes
# Notes are lines that start with a '>'.
if matchNote.match(line):
outClauses[-1].append(Line(line, LineType.NOTE))
continue
# Detect footnotes
# Footnotes are lines that start with a '^'
if (_fn := matchFootnote.match(line)):
footnotes.append(Footnote(_fn.groups()[0], Line(line, LineType.TEXT)))
continue
# Detect images on a single line
if (m := matchStandAloneImage.match(line)):
outClauses[-1].append(Line(line, LineType.STANDALONEIMAGE))
continue
# Detect headers
_lineType = LineType.TEXT
if (m := matchHeader.match(line)):
# Add a new clause
clauseTitle = m.groups()[1].strip()
clauseTitle = re.sub(matchHtmlTag, '', clauseTitle)
headerNumber = matchHeaderNumber.search(clauseTitle)
outClauses.append(Clause(len(m.groups()[0]), # level
headerNumber.group() if headerNumber else _shortHash(clauseTitle, 6),
clauseTitle,
[]))
_lineType = LineType.HEADING
# Just add the line to the current clause as text
outClauses[-1].append(Line(line, _lineType))
# Process still unfinished cases
if gridTable:
processGridTable()
return Document(outClauses, footnotes)
def main() -> None:
"""Hauptfunktion zur Verarbeitung von Markdown-Dateien über die Kommandozeile."""
import argparse
parser = argparse.ArgumentParser(description='Markdown-Dateien verarbeiten, um Gittertabellen zu konvertieren und andere Formatierungen zu handhaben')
parser.add_argument('eingabe', help='Eingabe-Markdown-Datei')
parser.add_argument('-v', '--verbose', action='store_true', help='Ausführliche Ausgabe aktivieren')
parser.add_argument('-vv', '--sehr-verbose', action='store_true', help='Sehr ausführliche Ausgabe aktivieren')
parser.add_argument('-i', '--ignoriere-titel', nargs='+', default=[], help='Liste der zu ignorierenden Titel')
parser.add_argument('-s', '--teilungs-ebene', type=int, default=1, help='Ebene, auf der das Dokument geteilt werden soll (Standard: 1)')
parser.add_argument('-f', '--ignoriere-erste', action='store_true', help='Inhalt bis zur ersten Überschrift ignorieren')
args = parser.parse_args()
# Verbositätsebenen setzen
global verbose, veryVerbose
verbose = args.verbose
veryVerbose = args.sehr_verbose
# Markdown-Datei verarbeiten
doc = analyseMarkdown(args.eingabe)
# Dokument teilen und verarbeiten
doc.splitMarkdownDocument(
ignoreTitles=args.ignoriere_titel,
splitLevel=args.teilungs_ebene,
ignoreUntilFirstHeading=args.ignoriere_erste
)
# Dokumentenelemente aktualisieren
doc.insertFootnotes()
doc.updateLinks()
doc.updateNotes()
# Verarbeitetes Dokument ausgeben
for clause in doc.clauses:
print(f"\n{'#' * clause.level} {clause.title}")
for line in clause.lines:
print(line.text, end='')
if __name__ == '__main__':
main()
#
# regexMatches.py
#
# (c) 2025 by Andreas Kraft & Miguel Angel Reina Ortega
# License: BSD 3-Clause License. See the LICENSE file for further details.
#
""" This module contains the regular expressions used in the markdown processing.
"""
import re
# Regular expressions
match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
matchFootnote = re.compile(r'\[\^([^\]]*)\]:', re.IGNORECASE)
matchHtmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE)
matchHtmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
matchHtmlTag = re.compile(r'<[^>]*>', re.IGNORECASE)
MatchInlineFootnote = re.compile(r'\[\^([^\]]*)\]', re.IGNORECASE)
markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE)
matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE)
matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE)
matchGridTable = re.compile(r'^\s*\+-.*\+\s$', re.IGNORECASE)
matchGridTableBodySeparator = re.compile(r'.*\+([:-]+\+)+.*$', re.IGNORECASE)
matchGridTableBodySeparatorLine = re.compile(r'[-:]+$', re.IGNORECASE)
matchGridTableHeaderSeparator = re.compile(r'.*\+([=:]+\+)+.*$', re.IGNORECASE)
matchGridTableSeparator = re.compile(r'\s*\+([-:=]+\+)+\s*$', re.IGNORECASE)
matchGridTableBodySeparator = re.compile(r'.*\+([:-]+\+)+.*$', re.IGNORECASE)
matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE)
matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE)
matchListInContent = re.compile(r'^(?:\s*(P<marker>[-*+]|\s*\d+\.))\s+(P<content>.+)$', re.IGNORECASE)
matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE)
matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE)
matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECASE)
matchTable = re.compile(r'^\s*\|.*\|\s*$', re.IGNORECASE)
matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE)
matchBold = re.compile(r'(^|\s)(\*\*|__)(?P<text>.+?)\2(?!\w)')
matchItalic = re.compile(r'(^|\s)(\*|_)(?P<text>.+?)\2(?!\w)')
\ No newline at end of file
# #
# toMkdocs.py # toMkdocs.py
# #
# (c) 2024 by Andreas Kraft # (c) 2024 by Andreas Kraft & Miguel Angel Reina Ortega
# License: BSD 3-Clause License. See the LICENSE file for further details.
# #
# This script converts oneM2M spec markdown file to a mkdocs compatible # This script converts oneM2M spec markdown file to a mkdocs compatible
# directory structure. # directory structure.
# #
from __future__ import annotations from __future__ import annotations
import logging import argparse, os, shutil
from enum import Enum, auto
import argparse, re, os, shutil, hashlib, base64
from dataclasses import dataclass
from rich import print from rich import print
from markdownTools import Line, Document, analyseMarkdown, setLoggers
from regexMatches import match2spaceListIndention
verbose = False verbose = False
veryVerbose = False veryVerbose = False
class LineType(Enum):
""" Represents the type of a line in the markdown file. """
HEADING = auto()
TEXT = auto()
CODEFENCESTART = auto()
CODE = auto()
CODEFENCEEND = auto()
LIST = auto()
NOTE = auto()
STANDALONEIMAGE = auto()
TABLEHEADER = auto()
TABLESEPARATOR = auto()
TABLEROW = auto()
TABLELASTROW = auto()
@dataclass
class Line:
""" Represents a line in the markdown file. """
text:str = '\n'
lineType:LineType = LineType.TEXT
@dataclass
class Clause:
""" Represents a clause in the markdown file. """
_level:int
_clauseNumber:str
_title:str
_lines:list[Line]
@property
def level(self) -> int:
""" Return the level of the clause. """
return self._level
@property
def clauseNumber(self) -> str:
""" Return the clause number. """
return self._clauseNumber if self._clauseNumber else '0'
@clauseNumber.setter
def clauseNumber(self, value:str) -> None:
""" Set the clause number. """
self._clauseNumber = value
@property
def title(self) -> str:
""" Return the title of the clause. """
return self._title
def printDebug(text:str) -> None:
@title.setter """ Print a debug message.
def title(self, value:str) -> None:
""" Set the title of the clause. """
self._title = value
@property
def lines(self) -> list[Line]:
""" Return the lines of the clause. """
return self._lines
@lines.setter
def lines(self, value:list[Line]) -> None:
""" Set the lines of the clause. """
self._lines = value
@property
def linesCount(self) -> int:
""" Return the number of lines in the clause.
Returns:
The number of lines in the clause.
"""
return len(self.lines)
def append(self, line:Line) -> None:
""" Append a line to the clause.
Args: Args:
line: The line to append. text: The text of the debug message.
""" """
self.lines.append(line) if verbose:
print(f'[dim]{text}')
def extend(self, clause:Clause) -> None: def printInfo(text:str) -> None:
""" Extend the clause with the lines of another clause. """ Print an information message.
Args: Args:
clause: The clause to extend with. text: The text of the information message.
""" """
self.lines.extend(clause.lines) print(f'[green]{text}')
def asStringList(self, paddings:int = 0) -> list[str]: def printWarning(text:str) -> None:
""" Return the clause as a list of strings. """ Print a warning message.
Args: Args:
paddings: The number of empty lines to add before the clause. text: The text of the warning message.
Returns:
The clause's lines as a list of strings.
"""
return [ '\n' for _ in range(paddings) ] + [ l.text for l in self.lines ]
def __len__(self) -> int:
""" Return the number of characters in the clause. This does not include
empty lines or lines that contain only whitespace.
Returns:
The number of characters in the clause.
""" """
return sum([ len(l.text.strip()) for l in self.lines ]) print(f'[yellow]{text}')
class Footnote:
""" Represents a footnote in the markdown file. """
def __init__(self, id:str, line:Line) -> None:
self.id = id
self.line = line
class Document:
""" Represents the document object. """
clauses:list[Clause] = []
footnotes:list[Footnote] = []
def __init__(self, clauses:list[Clause], footnotes:list[Footnote]) -> None:
self.clauses = clauses
self.footnotes = footnotes
def printError(text:str) -> None:
def splitMarkdownDocument(self, """ Print an error message.
ignoreTitles:list[str] = [],
splitLevel:int = 1,
ignoreUntilFirstHeading:bool = False) -> None:
""" Split the clauses at a certain level. This is used to create the separate
markdown files for MkDocs.
After the split, the clauses are stored in the document object.
Args: Args:
ignoreTitles: A list of titles that should be ignored. They are not included in the output. text: The text of the error message.
splitLevel: The level at which the clauses should be split.
ignoreUntilFirstHeader: Ignore all clauses until the first heading.
""" """
result:list[Clause] = [] print(f'[red]{text}')
ignoreTitles = [ t.casefold() for t in ignoreTitles ] # convert to lower case
for clause in self.clauses:
level = clause.level
# Check if the current clause should be ignored
if clause.title.casefold() in ignoreTitles:
continue
# Add a new output clause if the current clause's level is
# equal or less than the split level
if clause.level <= splitLevel:
result.append(Clause(level, clause.clauseNumber, clause.title, []))
# Add the lines to the output clause
result[-1].extend(clause)
# Remove the first clause if it has no title
if ignoreUntilFirstHeading:
while len(result[0].title) == 0:
result.pop(0)
self.clauses = result
def insertFootnotes(self) -> None:
""" Insert footnotes into the clauses.
After the insertion, the clauses are stored in the document object.
"""
print(f'[green]Adding footnotes to clauses')
for clause in self.clauses:
foundFootnotes:list[Footnote] = []
for line in clause.lines:
# ATTN: Only footnotes in normal text lines are checked
if line.lineType == LineType.TEXT and (fn := _inlineFootnote.search(line.text)):
# Find the footnote in the list of footnotes
for f in self.footnotes:
if f.id == fn.groups()[0]:
foundFootnotes.append(f)
# Insert the footnotes at the end of the clause
if len(foundFootnotes) > 0:
clause.append(Line('\n', LineType.TEXT))
for f in foundFootnotes:
clause.append(f.line)
def updateLinks(self) -> None: def prepareForMkdocs(document:Document, includeHangingParagraphs:bool = False) -> None:
""" Update the links in the clauses to the new structure. This is done by
creating a dictionary of all links and their targets and then replacing
the links in the clauses.
After the update, the clauses are stored in the document object.
"""
print(f'[green]Updating links in clauses')
# Build the link target dictionary. Mapping anchor -> clause
linkTargets:dict[str, Clause] = {}
# Find all Markdown headers in the clauses and convert them to anchor format
for i, clause in enumerate(self.clauses):
# Find all headers in the clause
for line in clause.lines:
if (m := _matchHeader.match(line.text)):
# convert the header to anchor format and add it to the dictionary
# Remove special characters
# TODO move perhaps to an own function
anchor = m.groups()[1].strip().casefold().replace(' ', '-')
for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'):
anchor = anchor.replace(c, '')
# remove html tags from the anchor
anchor = re.sub(_htmlTag, '', anchor)
linkTargets[f'#{anchor}'] = clause
if veryVerbose:
print(f'[dim]Added Markdown anchor "{anchor}"')
# Find all HTML anchors in the clauses and add them to the dictionary
for i, clause in enumerate(self.clauses):
for line in clause.lines:
if (anchors := _htmlAnchorLink.findall(line.text)):
for a in anchors:
linkTargets[f'#{a}'] = clause
if veryVerbose:
print(f'[dim]Found HTML anchor "{a}" in clause "{clause.title}"')
# Replace the html links
for clause in self.clauses:
for i, line in enumerate(clause.lines):
if (links := _htmlLink.findall(line.text)):
for lnk in links:
if lnk in linkTargets:
line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[lnk].clauseNumber}/#{lnk[1:]}') # Update the current line as well
if veryVerbose:
print(f'[dim]Updated HTML link "{lnk}" in clause "{clause.title}"')
# Replace the markdown links
for clause in self.clauses:
for i, line in enumerate(clause.lines):
if (links := _markdownLink.findall(line.text)):
# Replace the old link targets with converted
# (lower case) versions that point to the output files
for lnk in links:
_lnk =lnk.casefold()
if _lnk in linkTargets:
line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[_lnk].clauseNumber}/#{lnk[1:]}') # Update the current line as well
if veryVerbose:
print(f'[dim]Updated Markdown link "{lnk}" in clause "{clause.title}"')
def updateNotes(self) -> None:
""" Update the notes in the clauses to the mkDocs notes version.
After the update, the clauses are stored in the document object.
"""
print(f'[green]Updating notes in clauses')
for clause in self.clauses:
lines:list[Line] = []
inNote = False
for line in clause.lines:
if line.lineType == LineType.NOTE:
if not inNote:
lines.append(Line('\n', LineType.TEXT))
lines.append(Line('!!! note\n', LineType.NOTE))
inNote = True
lines.append(Line(f"\t{re.sub(_matchNoteStart, '', line.text)}", LineType.NOTE))
if verbose:
print(f'[dim]Converted note in clause "{clause.title}"')
else:
if inNote:
lines.append(Line('\n', LineType.TEXT))
inNote = False
lines.append(line)
clause.lines = lines
def prepareForMkdocs(self, includeHangingParagraphs:bool = False) -> None:
""" Prepare the clauses for MkDocs. This includes removing the heading """ Prepare the clauses for MkDocs. This includes removing the heading
from the clauses and marking the clauses that are only for navigation. from the clauses and marking the clauses that are only for navigation.
After the preparation, the clauses are stored in the document object. After the preparation, the clauses are stored in the document object.
Args: Args:
document: The document object.
includeHangingParagraphs: Include hanging paragraphs in the output. includeHangingParagraphs: Include hanging paragraphs in the output.
""" """
# Remove the heading from the lines. The heading is the first line # Remove the heading from the lines. The heading is the first line
# in the clause. This is done because MkDocs repeats the heading when # in the clause. This is done because MkDocs repeats the heading when
# displaying the page. # displaying the page.
for clause in self.clauses: for clause in document.clauses:
if clause.linesCount > 0: if clause.linesCount > 0:
clause.lines.pop(0) clause.lines.pop(0)
# Also, remove the first empty lines if they exist # Also, remove the first empty lines if they exist
...@@ -339,43 +78,44 @@ class Document: ...@@ -339,43 +78,44 @@ class Document:
# Detect and handle hanging paragraphs. This is extra text in a clause, which # Detect and handle hanging paragraphs. This is extra text in a clause, which
# has sub-clauses. This text is not allowed in oneM2M specifications. # has sub-clauses. This text is not allowed in oneM2M specifications.
for i, clause in enumerate(self.clauses): for i, clause in enumerate(document.clauses):
if clause.level > 0 and clause.linesCount > 0: if clause.level > 0 and clause.linesCount > 0:
# Check if there is a sub-clause in the next clause # Check if there is a sub-clause in the next clause
if i + 1 < len(self.clauses) and self.clauses[i+1].level > clause.level: if i + 1 < len(document.clauses) and document.clauses[i+1].level > clause.level:
# This is a hanging paragraph. Remove the text from the current clause. # This is a hanging paragraph. Remove the text from the current clause.
print(f'[yellow]Hanging paragraph in clause "{clause.title}" {"(removed)" if not includeHangingParagraphs else "(kept)"}') printWarning(f'Hanging paragraph in clause "{clause.title}" {"(removed)" if not includeHangingParagraphs else "(kept)"}')
if not includeHangingParagraphs: if not includeHangingParagraphs:
self.clauses[i].lines = [] document.clauses[i].lines = []
else: else:
self.clauses[i].lines = [Line("<mark>Editor note: This is a hanging paragraph and it must be moved to its own clause</mark>")] + [Line()] + self.clauses[i].lines # Add a note to the hanging paragraph
document.clauses[i].lines = [Line("<mark>Editor note: This is a hanging paragraph and it must be moved to its own clause</mark>")] + [Line()] + document.clauses[i].lines
# Repair wrong markdown for indented lines. # Repair wrong markdown for indented lines.
# Add 2 spaces to existing 2-space indentions # Add 2 spaces to existing 2-space indentions
for clause in self.clauses: for clause in document.clauses:
for i, line in enumerate(clause.lines): for i, line in enumerate(clause.lines):
if _match2spaceListIndention.match(line.text): if match2spaceListIndention.match(line.text):
clause.lines[i].text = ' ' + line.text clause.lines[i].text = ' ' + line.text
def writeClausesMkDocs(self, filename:str, navTitle:str, addNavTitle:bool = False) -> None: def writeClausesMkDocs(document:Document, filename:str, navTitle:str, addNavTitle:bool = False) -> None:
""" Write the clauses to separate files and create a navigation file. """ Write the clauses to separate files and create a navigation file.
Args: Args:
document: The document object.
filename: The name of the original markdown file. filename: The name of the original markdown file.
navTitle: The title of the navigation entry. This is used to determine the directories. navTitle: The title of the navigation entry. This is used to determine the directories.
addNavTitle: Add the title as an extra navigation level to the navigation file. addNavTitle: Add the title as an extra navigation level to the navigation file.
""" """
print(f'[green]Writing clauses to files') printInfo(f'Writing clauses to files')
# create directory first # create directory first
os.makedirs(f'{os.path.dirname(filename)}/{navTitle}', exist_ok = True) os.makedirs(f'{os.path.dirname(filename)}/{navTitle}', exist_ok = True)
# Write the files # Write the files
for i, f in enumerate(self.clauses): for i, f in enumerate(document.clauses):
# write to single files, even empty ones # write to single files, even empty ones
if verbose: printDebug(f'Writing "{f.clauseNumber}.md" - "{f.title}"')
print(f'[dim]Writing "{f.clauseNumber}.md" - "{f.title}"')
with open(f'{os.path.dirname(filename)}/{navTitle}/{f.clauseNumber}.md', 'w') as file: with open(f'{os.path.dirname(filename)}/{navTitle}/{f.clauseNumber}.md', 'w') as file:
# Add one empty line before the clause. This is done to avoid # Add one empty line before the clause. This is done to avoid
# a bug in MkDocs that does not display the first line of a clause # a bug in MkDocs that does not display the first line of a clause
...@@ -385,23 +125,22 @@ class Document: ...@@ -385,23 +125,22 @@ class Document:
# write nav.yml file # write nav.yml file
print(f'[green]Writing "_nav.yml"') printInfo(f'Writing "_nav.yml"')
indentation = ' ' if addNavTitle else '' # TODO make number of spaces configurable indentation = ' ' if addNavTitle else '' # TODO make number of spaces configurable
with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file: with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file:
if veryVerbose: printDebug(f'Writing navigation file')
print(f'[dim]Writing navigation file')
if addNavTitle: if addNavTitle:
file.write(f'{indentation}- {navTitle}:\n') file.write(f'{indentation}- {navTitle}:\n')
for i, f in enumerate(self.clauses): for i, f in enumerate(document.clauses):
if not f.title: if not f.title:
print("continue") # print("continue")
continue continue
# TODO handle if the next clause is more than one level deeper # TODO handle if the next clause is more than one level deeper
_title = f.title.replace("'", '"') _title = f.title.replace("'", '"')
nextClause = self.clauses[i+1] if i+1 < len(self.clauses) else None nextClause = document.clauses[i+1] if i+1 < len(document.clauses) else None
if nextClause is None or nextClause.level <= f.level: if nextClause is None or nextClause.level <= f.level:
file.write(f"{indentation}{' '*f.level}- '{_title}': '{navTitle}/{f.clauseNumber}.md'\n") file.write(f"{indentation}{' '*f.level}- '{_title}': '{navTitle}/{f.clauseNumber}.md'\n")
else: else:
...@@ -410,739 +149,9 @@ class Document: ...@@ -410,739 +149,9 @@ class Document:
file.write(f"{indentation}{' '*nextClause.level}- 'Hanging paragraph': '{navTitle}/{f.clauseNumber}.md'\n") file.write(f"{indentation}{' '*nextClause.level}- 'Hanging paragraph': '{navTitle}/{f.clauseNumber}.md'\n")
_matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE)
_matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE)
_matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE)
_matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE)
_matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE)
_matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECASE)
_matchTable = re.compile(r'^\s*\|.*\|\s*$', re.IGNORECASE)
_matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE)
_matchGridTable = re.compile(r'^\s*\+-.*\+\s$', re.IGNORECASE)
_matchGridTableSeparator = re.compile(r'\s*\+([-:=]+\+)+\s*$', re.IGNORECASE)
_matchGridTableBodySeparator = re.compile(r'.*\+([:-]+\+)+.*$', re.IGNORECASE)
_matchGridTableHeaderSeparator = re.compile(r'.*\+([=:]+\+)+.*$', re.IGNORECASE)
_matchGridTableBodySeparatorLine = re.compile(r'[-:]+$', re.IGNORECASE)
_match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
_matchListInContent = re.compile(r'^(?:\s*(P<marker>[-*+]|\s*\d+\.))\s+(P<content>.+)$', re.IGNORECASE)
_markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE)
_htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
_htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE)
_htmlTag = re.compile(r'<[^>]*>', re.IGNORECASE)
_matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE)
_footnote = re.compile(r'\[\^([^\]]*)\]:', re.IGNORECASE)
_inlineFootnote = re.compile(r'\[\^([^\]]*)\]', re.IGNORECASE)
# TODO handle multiple nav levels (left bar) better (make conifgurable) # TODO handle multiple nav levels (left bar) better (make conifgurable)
def shortHash(value:str, length:int) -> str:
""" Generate a short hash of a string value.
Args:
value: The value to hash.
length: The length of the hash.
Returns:
The hash.
"""
return base64.b64encode(
hashlib.sha256(
value.encode()
).digest()
).decode()[:length]
def parse_pandoc_table_with_spans(pandoc_table):
"""
Parse a Pandoc-style grid table into a structure for HTML conversion with rowspan and colspan.
:param pandoc_table: String of the Pandoc-style grid table.
:return: List of lists representing the table with metadata for spans.
"""
# Split the input into lines
lines = [line.strip() for line in pandoc_table.strip().split("\n")]
class Cell:
""" Represents the document object. """
content: str
rowspan: int
colspan: int
colspan_adjusted: bool
alignment: str
position: int
list_flag: bool
auxiliar_index: int
def __init__(self):
self.content = None
self.rowspan = 0
self.colspan = 0
self.colspan_adjusted = False
self.alignment = "align=\"center\""
self.position = None
self.list_flag = False
def set_alignment(self):
if has_header:
header_delimiter_index = 0
while header_delimiter_index in range(len(default_alignments)) and self.position > header_delimiter_positions[header_delimiter_index]:
header_delimiter_index += 1
if header_delimiter_index in range(len(default_alignments)):
if self.position < header_delimiter_positions[header_delimiter_index]:
self.alignment = default_alignments[header_delimiter_index]
elif self.position == header_delimiter_positions[header_delimiter_index]:
self.alignment = default_alignments[header_delimiter_index]
header_delimiter_index += 1
else:
raise ValueError("Invalid table formatting")
else:
body_delimiter_index = 0
while body_delimiter_index in range(len(default_alignments)) and self.position > \
delimiter_positions[body_delimiter_index]:
body_delimiter_index += 1
if body_delimiter_index in range(len(default_alignments)):
if self.position < delimiter_positions[body_delimiter_index]:
self.alignment = default_alignments[body_delimiter_index]
elif self.position == delimiter_positions[body_delimiter_index]:
self.alignment = default_alignments[body_delimiter_index]
body_delimiter_index += 1
else:
raise ValueError("Invalid table formatting")
class Row():
""" Represents a row in the markdown file. """
cells:list[Cell] = []
def __init__(self, length: int = 1) -> None:
self.cells = [Cell() for _ in range(length)]
def __getitem__(self, item):
return self.cells[item]
def __setitem__(self, key, value):
self.cells[key] = value
class RowTracker():
""" Represents the document object. """
def __init__(self, items):
self.rowTracker = [0 for _ in range(items)]
def __getitem__(self, item):
return self.rowTracker[item]
def __setitem__(self, key, value):
self.rowTracker[key] = value
# Detect separator lines by pattern (it does not take into account partial separators
def is_separator(line):
return _matchGridTableSeparator.match(line)
# Set content on the cell - concatenating multilines, flagging lists
def handling_content(cell, content):
if cell.content is None:
cell.rowspan += 1
cell.colspan += 1
if content.strip().startswith("- "): # List
cell.list_flag = True
#print(content)
content = re.sub(r'\\\s*$', "\n", content.strip())
cell.content = content + "@" # Add list element end mark to know when the list element ends
elif cell.list_flag and content.strip() != "": # any other content when handling list is concatenated to the last list element
content = re.sub(r'\\\s*$', "\n", content.strip())
cell.content += content + "@" #add the list element end mark
elif content.strip == "": # separation between list and other paragraph
#if cell.list_flag:
# cell.list_flag = False
cell.content += "\n" if not cell['content'].endswith("\n") else ""
else:
cell.content = re.sub(r'\\\s*$', "\n", content.strip())
else:
if content.strip().startswith("- "): # List
if not cell.list_flag:
cell.content += "\n"
#cell['content'] = cell['content'].strip("\n")
cell.list_flag = True
content = re.sub(r'\\\s*$', "\n", content.strip())
cell.content += content + "@" # Add list element end mark to know when the list element ends
elif cell.list_flag and content.strip() != "": # any other content when handling list is concatenated to the last list element
cell.content = cell.content.strip("@") #remove list element end mark
content = re.sub(r'\\\s*$', "\n", content.strip())
cell.content += " " + content + "@" #add list element end mark
elif content.strip() == "": # separation between list and other paragraph
if cell.list_flag:
cell.list_flag = False
cell.content += "\n\n" #end list by \n
#content = re.sub(r'\\\s*$', "\n", content.strip())
cell.content += "\n" if not cell.content.endswith("\n") else ""
else:
content = re.sub(r'\\\s*$', "\n", content.strip())
cell.content += " " + content
#print(cell['content'])
return cell
# Adjust colspan of a cell
def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns, delimiter_positions):
for j in range(column_index, number_of_parts):
delimiter_start = None
col_i= column_index
while delimiter_start == None:
delimiter_start = row[col_i - 1].position if col_i > 0 else 0
col_i -= 1
positions = [line.find(delimiter, delimiter_start + 1) for delimiter in "|+" if delimiter in line[delimiter_start + 1:]]
position = min(positions) if positions else -1
if position > delimiter_positions[j]: # Colspan to be increased
row[column_index].colspan += 1
if position == delimiter_positions[len(delimiter_positions) - 1]: # last cell in row, adjust colspan to get max number columns
colspan_allocated = row[column_index].colspan
#for cell_index in range(number_of_parts):
# colspan_allocated += row[cell_index].colspan
row[column_index].colspan += number_of_columns - colspan_allocated - column_index
elif position < delimiter_positions[j]:
raise ValueError("Wrong cell formatting")
else:
break
return row[column_index]
separator_indices = [i for i, line in enumerate(lines) if is_separator(line)]
print(separator_indices)
if not separator_indices:
raise ValueError("No valid separators found in the provided Pandoc table.")
# Calculate max number of columns
delimiter_positions = []
number_of_columns = 0
for separator_index in separator_indices:
if lines[separator_index].count("+") - 1 > number_of_columns:
number_of_columns = lines[separator_index].count("+") - 1
delimiter_positions = []
for j in range(number_of_columns):
delimiter_positions_start = delimiter_positions[j - 1] if j != 0 else 0
del_positions = [lines[separator_index].find(delimiter, delimiter_positions_start + 1) for delimiter in "+" if delimiter in lines[separator_index][delimiter_positions_start + 1:]]
delimiter_positions.append(min(del_positions) if del_positions else -1)
has_header = False
header_delimiter_positions = []
header_rows = []
for index in separator_indices:
if _matchGridTableHeaderSeparator.match(lines[index]):
has_header = True
header_separator_index = index
header_rows = []
parts = re.split(r"\+", lines[index].strip("+"))
default_alignments = []
#Calculate default alignments and positions of delimiters
for part_index in range(len(parts)):
if parts[part_index].startswith(":") and not parts[part_index].endswith(":"):
default_alignments.append("align=\"left\"")
elif not parts[part_index].startswith(":") and parts[part_index].endswith(":"):
default_alignments.append("align=\"right\"")
else:
default_alignments.append("align=\"center\"")
# Delimiter position
delimiter_positions_start = delimiter_positions[part_index - 1] if part_index != 0 else 0
del_positions = [lines[index].find(delimiter, delimiter_positions_start + 1) for delimiter in "+" if delimiter in lines[index][delimiter_positions_start + 1:]]
header_delimiter_positions.append(min(del_positions) if del_positions else -1)
if not has_header:
#Set default alignments from the first separator
parts = re.split(r"\+", lines[0].strip("+"))
default_alignments = []
# Calculate default alignments and positions of delimiters
for part_index in range(len(parts)):
if parts[part_index].startswith(":") and not parts[part_index].endswith(":"):
default_alignments.append("align=\"left\"")
elif not parts[part_index].startswith(":") and parts[part_index].endswith(":"):
default_alignments.append("align=\"right\"")
else:
default_alignments.append("align=\"center\"")
data_rows = []
for row in range(len(separator_indices) - 1):
rows = []
rows_tracker = []
in_data_row = False
start, end = separator_indices[row], separator_indices[row + 1]
row_lines = lines[start:end] # Lines between separators including separator line start as it gives information about the number of columns of the row
if row_lines:
# Combine multiline content into single strings for each cell
for line in row_lines:
if is_separator(line) and not in_data_row:
in_data_row = True
# Add delimiter alignment check for separator lines
if not check_delimiter_alignment(line, delimiter_positions):
raise ValueError(f"Misaligned delimiters in separator row: {line}")
parts = re.split(r"\s*\+\s*", line.strip("+"))
delimiter_index = 0
# Determine the alignment of the cell - In order to replicate Pandoc's behaviour (do not support of alignment colons on separator lines (just header separator)
# we need to assign the default alignment as defined in the header separator line
# We may not need the code below, as that supports alignment per cell and row
#alignments = []
#for part_index in range(len(parts)):
# if parts[part_index].startswith(":") and not parts[part_index].endswith(":"):
# alignments.append("align=\"left\"")
# elif not parts[part_index].startswith(":") and parts[part_index].endswith(":"):
# alignments.append("align=\"right\"")
# else:
# alignments.append("align=\"center\"")
rows.append(Row(number_of_columns))
#rows_tracker = [RowTracker() for _ in range(number_of_columns)]
rows_tracker = RowTracker(number_of_columns)
i = 0
for j in range(len(parts)):
if i in range(number_of_columns):
delimiter_index += len(parts[j]) + 1
# Set position
rows[-1][i].position = delimiter_index # Position of cell delimiter +
# Set alignment as defined by header separator line
rows[-1][i].set_alignment()
while delimiter_index > delimiter_positions[i]:
i += 1
i += 1
elif in_data_row:
# Regular data row or partial separator
if _matchGridTableBodySeparator.match(line): # Partial separator
# Add delimiter alignment check for partial separators
if not check_delimiter_alignment(line, delimiter_positions):
raise ValueError(f"Misaligned delimiters in partial separator: {line}")
cells_content = re.split(r"[\|\+]", line.strip("|").strip("+"))
#Add another row, set delimiters for each cell
rows.append(Row(number_of_columns))
aux_delimiter_index = 0
auxiliar_cell_index = 0
for i in range(len(cells_content)):
if auxiliar_cell_index in range(number_of_columns):
aux_delimiter_index += len(cells_content[i]) + 1
rows[-1][auxiliar_cell_index].position = aux_delimiter_index # Position of cell delimiter +
rows[-1][auxiliar_cell_index].set_alignment()
while aux_delimiter_index > delimiter_positions[auxiliar_cell_index]:
auxiliar_cell_index += 1
auxiliar_cell_index += 1
if len(cells_content) <= number_of_columns: # Colspan: Positions of | with respect to + need to be determined
column_index = 0
for i in range(len(cells_content)):
if _matchGridTableBodySeparatorLine.match(cells_content[i]): # A new row is to be added
rows_tracker[column_index] += 1
rows[rows_tracker[column_index]][column_index].list_flag = False
#auxiliar_rows[-1]['use_auxiliar_row'][i] = True
#if cells[i].startswith(":") and not cells[i].endswith(":"):
# auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"left\""
#elif not cells[i].startswith(":") and cells[i].endswith(":"):
# auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"right\""
#else:
# auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"center\""
column_forward = 0
for del_index in range(column_index, len(delimiter_positions)):
if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[del_index]:
column_forward += 1
rows_tracker[column_index + column_forward - 1] += 1 if column_forward > 1 else 0
column_index += column_forward
continue
else:
# Handle content of the cell
rows[rows_tracker[column_index]][column_index] = handling_content(rows[rows_tracker[column_index]][column_index], cells_content[i])
rows[rows_tracker[column_index]][column_index].rowspan += 1
if not rows[rows_tracker[column_index]][column_index].colspan_adjusted:
rows[rows_tracker[column_index]][column_index].colspan_adjusted = True
# TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
rows[rows_tracker[column_index]][column_index] = adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, line, number_of_columns, delimiter_positions)
if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index]:
column_index += rows[rows_tracker[column_index]][column_index].colspan if rows[rows_tracker[column_index]][column_index].colspan != 0 else 1
continue
else:
raise ValueError("More cells than columns found")
else: # Data row
cells_content = line.strip()
cells_content = re.split(r"\|", line.strip("|"))
# Add delimiter alignment check
if not check_delimiter_alignment(line, delimiter_positions):
raise ValueError(f"Misaligned delimiters in row: {line}")
column_index = 0
if len(cells_content) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined
for i in range(len(cells_content)):
# Handle content of the cell
rows[rows_tracker[column_index]][column_index] = handling_content(rows[rows_tracker[column_index]][column_index], cells_content[i])
if not rows[rows_tracker[column_index]][column_index].colspan_adjusted:
rows[rows_tracker[column_index]][column_index].colspan_adjusted = True
#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
rows[rows_tracker[column_index]][column_index] = adjust_colspan(rows[rows_tracker[column_index]], column_index, number_of_columns, line, number_of_columns, delimiter_positions)
if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index]:
column_index += rows[rows_tracker[column_index]][column_index].colspan # Move forward index i
elif len(cells_content) == number_of_columns: # Simple row
for i in range(len(cells_content)):
rows[rows_tracker[i]][i] = handling_content(rows[rows_tracker[i]][i], cells_content[i])
else:
raise ValueError("More cells than columns found")
else:
raise ValueError("No separator line found for row starting")
if has_header and start >= header_separator_index: # table_row and auxiliar_row are part of data_rows
for body_row in rows:
data_rows.append(body_row.cells)
elif has_header and start < header_separator_index: # table_row and auxiliar_row are part of header_rows
for header_row in rows:
header_rows.append(header_row.cells)
else:
#only body
for body_row in rows:
data_rows.append(body_row.cells)
#print(header_rows)
#print(data_rows)
# Check if there are any data rows
if not data_rows and not header_rows:
raise ValueError("No valid rows found in the provided Pandoc table.")
# Format text
for rows in [header_rows, data_rows]:
bold = "<strong>"
italic = "<i>"
for row in rows:
for cell in row:
if cell.content is not None:
# Replacing "<" by &lt;
#cell.content = cell.content.replace("<", "&lt;")
#Bold
for bold_characters in ["**", "__"]:
while cell.content.find(bold_characters) != -1:
cell.content = cell.content.replace(bold_characters, bold, 1)
if bold == "<strong>":
bold = "</strong>"
else:
bold = "<strong>"
#Italic
while cell.content.find("_") != -1 and cell.content.find("\_") == -1:
cell.content = cell.content.rstrip() .replace("_", italic, 1)
if italic == "<i>":
italic = "</i>"
else:
italic = "<i>"
while cell.content.find("\_") != -1:
cell.content = cell.content.rstrip().replace("\_", "_", 1)
# Correct newlines characters
for row in header_rows:
for cell in row:
cell.content = cell.content.replace("\n", "<br />") if cell.content is not None else None
for row in data_rows:
for cell in row:
cell.content = cell.content.replace("\n", "<br />") if cell.content is not None else None
# Checking that the grid is correct Not too much tested - need to take into account rowspan of previous rows
forward_rowspan = []
for row_index in range(len(header_rows)):
if len(forward_rowspan) == 0:
forward_rowspan = [0 for _ in range(len(header_rows[row_index]))]
sum = 0
for cell_index in range(len(header_rows[row_index])):
sum += header_rows[row_index][cell_index].colspan
if row_index > 0 and header_rows[row_index][cell_index].colspan == 0:
if forward_rowspan[cell_index] > 0:
sum += 1
forward_rowspan[cell_index] -= 1
if forward_rowspan[cell_index] == 0 and header_rows[row_index][cell_index].rowspan > 1:
forward_rowspan[cell_index] = header_rows[row_index][cell_index].rowspan -1
if not sum == number_of_columns:
raise ValueError("Grid table not converted properly")
forward_rowspan = []
for row_index in range(len(data_rows)):
if len(forward_rowspan) == 0:
forward_rowspan = [0 for _ in range(len(data_rows[row_index]))]
sum = 0
for cell_index in range(len(data_rows[row_index])):
sum += data_rows[row_index][cell_index].colspan
if row_index > 0 and data_rows[row_index][cell_index].colspan == 0:
if forward_rowspan[cell_index] > 0:
sum += 1
forward_rowspan[cell_index] -= 1
if forward_rowspan[cell_index] == 0 and data_rows[row_index][cell_index].rowspan > 1:
forward_rowspan[cell_index] = data_rows[row_index][cell_index].rowspan - 1
if not sum == number_of_columns:
raise ValueError("Grid table not converted properly")
return header_rows, data_rows
def generate_html_table_with_spans(pandoc_table: str) -> str:
"""
Generate an HTML table from a Pandoc-style grid table with row and column spans.
Args:
pandoc_table (str): String of the Pandoc-style grid table.
Returns:
str: Generated HTML table markup, or error message if generation fails.
"""
debug_output = []
def debug_print(msg):
debug_output.append(str(msg)) # Convert message to string
try:
# Redirect print statements to our debug collector
global print
original_print = print
print = debug_print
grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table)
# Restore original print
print = original_print
# Generate table HTML...
html = "<table>\n"
has_header = False
for row in grid_header:
for cell in row:
if cell.rowspan != 0 and cell.colspan != 0:
has_header = True
if has_header:
html += " <thead>\n"
for row in grid_header:
html += " <tr>\n"
for cell in row:
if cell.rowspan == 0 or cell.colspan == 0:
continue
else:
# Prepare content, in case there's a list
#print(cell.content)
if matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+((?:(?!@).)+)@",
cell.content): # Update cell in new row
#print("MATCHING")
list = "<ul>"
# Build list the matches
for match in matches:
list += "<li>" + match[1] + "</li>"
list += "</ul>"
cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+(?:(?!@).)+@)+", list, cell.content)
# Enforce left alignment if cell contains a list
cell.alignment = "align=\"left\""
#else:
# print("NOT MATCHING")
rowspan = f" rowspan=\"{cell.rowspan}\"" if cell.rowspan > 1 else ""
colspan = f" colspan=\"{cell.colspan}\"" if cell.colspan > 1 else ""
html += f" <th{rowspan}{colspan} {cell.alignment}>{cell.content}</th>\n"
html += " </tr>\n"
html += " </thead>\n"
html += " <tbody>\n"
for row in grid_body:
html += " <tr>\n"
for cell in row:
if cell.rowspan == 0 or cell.colspan == 0:
continue
else:
#Prepare content, in case there's a list
#print(cell.content)
if matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+((?:(?!@).)+)@", cell.content): # Update cell in new row
#print("MATCHING")
#print(cell.content)
list = "<ul>"
# Build list the matches
for match in matches:
list += "<li>" + match[1] + "</li>"
list += "</ul>"
cell.content = re.sub(r"\s*([-*+]|\s*\d+\.)\s+((?:(?!@).)+@)+",list, cell.content)
# Enforce left alignment if cell contains a list
cell.alignment = "align=\"left\""
#else:
#print("NOT MATCHING")
rowspan = f" rowspan=\"{cell.rowspan}\"" if cell.rowspan > 1 else ""
colspan = f" colspan=\"{cell.colspan}\"" if cell.colspan > 1 else ""
html += f" <td{rowspan}{colspan} {cell.alignment}>{cell.content}</td>\n"
html += " </tr>\n"
html += " </tbody>\n"
html += "</table>"
return html
except Exception as e:
logging.error("Grid table could not be generated")
debug_text = "<br>".join(debug_output) # Now all items are strings
return f"HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE.<br><pre>{debug_text}</pre>"
def check_delimiter_alignment(line: str, delimiter_positions: list[int], delimiters: str = "|+") -> bool:
"""
Check if delimiters in a row align with expected positions.
Args:
line: The line of text to check
delimiter_positions: List of expected positions (based on + characters)
delimiters: String containing valid delimiter characters (default: "|+")
Returns:
bool: True if delimiters align correctly, False otherwise
"""
if not line or not delimiter_positions:
return False
print(f"\nChecking line: '{line}'")
print(f"Expected delimiter positions: {delimiter_positions}")
# For full separator lines (only +)
if '+' in line and '|' not in line:
current_positions = [i for i, char in enumerate(line) if (char == '+' and i != 0)]
print(f"Full separator line - Found + at positions: {current_positions}")
return all(delimiter_positions[-1] in current_positions and
line.startswith("+") and
pos in delimiter_positions for pos in current_positions)
# For data lines (only |)
if '|' in line and '+' not in line:
current_positions = [i for i, char in enumerate(line) if (char == '|' and i != 0)]
print(f"Data line - Found | at positions: {current_positions}")
return all(delimiter_positions[-1] in current_positions and
line.startswith("|") and
pos in delimiter_positions for pos in current_positions)
# For partial separators (mix of + and |)
current_positions = [i for i, char in enumerate(line) if (char in delimiters and i != 0)]
print(f"Partial separator - Found delimiters at positions: {current_positions}")
print(f"Characters at those positions: {[line[pos] for pos in current_positions]}")
return all(delimiter_positions[-1] in current_positions and
(line.startswith("+") or line.startswith("|")) and
pos in delimiter_positions for pos in current_positions)
def analyseMarkdown(filename:str) -> Document:
""" Analyse the markdown file and split it into clauses.
Args:
filename: The name of the markdown file.
Returns:
The document object.
"""
print(f'[green]Analyzing "{filename}"')
# Read the file.
# Note: We use utf-8 and replace errors to avoid problems with special or unknown characters.
with open(filename, 'r', encoding = 'utf-8', errors = 'replace') as file:
inLines = file.readlines()
# The list of clauses. The first clause contains the text before the first heading.
outClauses:list[Clause] = [Clause(0, '', '', [])]
footnotes:list[Footnote] = []
# Go through the lines and detect headers and codefences
inCodefence = False
inTable = False
tableHasSeparator = False
inGridTable = False
gridTableHasSeparator = False
gridTable = ""
for line in inLines:
# Detect and handle codefences
# For the moment we support only codefences that start and end
# with 3 backticks. This is the most common way to define codefences.
# Note, that longer codefences are allowed by the markdown specification.
if _matchCodefenceStart.match(line) and not inCodefence:
inCodefence = True
outClauses[-1].append(Line(line, LineType.CODEFENCESTART))
continue
if _matchCodefenceEnd.match(line):
inCodefence = False
outClauses[-1].append(Line(line, LineType.CODEFENCEEND))
continue
if inCodefence:
outClauses[-1].append(Line(line, LineType.CODE))
continue
# Detect and handle tables
if _matchTable.match(line) and not inTable and not inGridTable:
inTable = True
outClauses[-1].append(Line(line, LineType.TABLEHEADER))
continue
if inTable:
if _matchTableSeparator.match(line) and not tableHasSeparator:
outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
tableHasSeparator = True
continue
elif _matchTable.match(line):
outClauses[-1].append(Line(line, LineType.TABLEROW))
continue
else:
inTable = False
tableHasSeparator = False
# Mark the previous line as the last row in the table
outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW
# continue with other matches
#Detect grid tables and convert them to html table
if _matchGridTable.match(line) and not inGridTable:
inGridTable = True
#outClauses[-1].append(Line(line, LineType.TABLEHEADER))
gridTable += line
continue
if inGridTable:
if _matchGridTableHeaderSeparator.match(line) or _matchGridTableBodySeparator.match(line):
#outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
gridTable += line
continue
elif _matchTable.match(line):
#outClauses[-1].append(Line(line, LineType.TABLEROW))
gridTable += line
continue
else:
inGridTable = False
# Mark the previous line as the last row in the table
#outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW
print(gridTable)
htmltable = ""
htmltable = generate_html_table_with_spans(gridTable)
print(htmltable)
for row in htmltable:
outClauses[-1].append(Line(row, LineType.TABLEROW))
gridTable = ""
# continue with other matches
# Detect notes
# Notes are lines that start with a '>'.
if _matchNote.match(line):
outClauses[-1].append(Line(line, LineType.NOTE))
continue
# Detect footnotes
# Footnotes are lines that start with a '^'
if (_fn := _footnote.match(line)):
footnotes.append(Footnote(_fn.groups()[0], Line(line, LineType.TEXT)))
continue
# Detect images on a single line
if (m := _matchStandAloneImage.match(line)):
outClauses[-1].append(Line(line, LineType.STANDALONEIMAGE))
continue
# Detect headers
_lineType = LineType.TEXT
if (m := _matchHeader.match(line)):
# Add a new clause
clauseTitle = m.groups()[1].strip()
clauseTitle = re.sub(_htmlTag, '', clauseTitle)
headerNumber = _matchHeaderNumber.search(clauseTitle)
outClauses.append(Clause(len(m.groups()[0]), # level
headerNumber.group() if headerNumber else shortHash(clauseTitle, 6),
clauseTitle,
[]))
_lineType = LineType.HEADING
# Just add the line to the current clause as text
outClauses[-1].append(Line(line, _lineType))
return Document(outClauses, footnotes)
def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None: def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None:
""" Copy media files from the source directory to the target directory. """ Copy media files from the source directory to the target directory.
...@@ -1155,10 +164,10 @@ def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> ...@@ -1155,10 +164,10 @@ def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') ->
targetDirectory = f'{os.path.dirname(filename)}/{navTitle}/{mediaDirectory}' targetDirectory = f'{os.path.dirname(filename)}/{navTitle}/{mediaDirectory}'
if os.path.exists(sourceDirectory): if os.path.exists(sourceDirectory):
print(f'[green]Copying media files from "{sourceDirectory}" to "{targetDirectory}"') printInfo(f'Copying media files from "{sourceDirectory}" to "{targetDirectory}"')
shutil.copytree(sourceDirectory, targetDirectory, dirs_exist_ok = True) shutil.copytree(sourceDirectory, targetDirectory, dirs_exist_ok = True)
else: else:
print(f'[red]Media directory "{sourceDirectory}" does not exist') printError(f'Media directory "{sourceDirectory}" does not exist')
def processDocument(args:argparse.Namespace) -> None: def processDocument(args:argparse.Namespace) -> None:
...@@ -1175,20 +184,22 @@ def processDocument(args:argparse.Namespace) -> None: ...@@ -1175,20 +184,22 @@ def processDocument(args:argparse.Namespace) -> None:
document.insertFootnotes() document.insertFootnotes()
document.updateLinks() document.updateLinks()
document.updateNotes() document.updateNotes()
document.prepareForMkdocs(args.include_hanging_paragraphs)
prepareForMkdocs(document, args.include_hanging_paragraphs)
# Write the clauses to files # Write the clauses to files
document.writeClausesMkDocs(inDocumentFilename, args.title, args.nav_add_title) writeClausesMkDocs(document, inDocumentFilename, args.title, args.nav_add_title)
# Copy the media files # Copy the media files
copyMediaFiles(inDocumentFilename, args.title, args.media_directory) copyMediaFiles(inDocumentFilename, args.title, args.media_directory)
if __name__ == '__main__': def main() -> None:
parser = argparse.ArgumentParser(description = 'Convert oneM2M markdown specificatios to MkDocs format', parser = argparse.ArgumentParser(description = 'Convert oneM2M markdown specificatios to MkDocs format',
formatter_class = argparse.ArgumentDefaultsHelpFormatter) formatter_class = argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--verbose', '-v', action = 'store_true', help = 'verbose output during processing') parser.add_argument('--verbose', '-v', action = 'store_true', help = 'verbose output during processing')
parser.add_argument('--out', '-o', metavar='outfile', help = 'write output to file instead of stdout')
parser.add_argument('--very-verbose', '-vv', action = 'store_true', help = 'very verbose output during processing') parser.add_argument('--very-verbose', '-vv', action = 'store_true', help = 'very verbose output during processing')
parser.add_argument('--ignore-clause', '-ic', metavar = 'clause', nargs = '+', default = [ 'Contents', 'History' ], help = 'ignore headers in the markdown document') parser.add_argument('--ignore-clause', '-ic', metavar = 'clause', nargs = '+', default = [ 'Contents', 'History' ], help = 'ignore headers in the markdown document')
parser.add_argument('--include-hanging-paragraphs', '-ihp', action = 'store_true', default = False, help = 'include hanging paragraphs (text in clauses with sub-clauses) in the output files') parser.add_argument('--include-hanging-paragraphs', '-ihp', action = 'store_true', default = False, help = 'include hanging paragraphs (text in clauses with sub-clauses) in the output files')
...@@ -1198,8 +209,15 @@ if __name__ == '__main__': ...@@ -1198,8 +209,15 @@ if __name__ == '__main__':
parser.add_argument('--title', '-t', metavar = 'title', required = True, help = 'mkdocs navigation tile') parser.add_argument('--title', '-t', metavar = 'title', required = True, help = 'mkdocs navigation tile')
parser.add_argument('--nav-add-title', '-nat', action = 'store_true', default = False, help = 'add the title as an extra navigation level to the navigation file') parser.add_argument('--nav-add-title', '-nat', action = 'store_true', default = False, help = 'add the title as an extra navigation level to the navigation file')
parser.add_argument('document', type = str, help = 'a oneM2M markdown specification document to process') parser.add_argument('document', type = str, help = 'a oneM2M markdown specification document to process')
args = parser.parse_args() args = parser.parse_args()
setLoggers(info = printInfo,
debug = printDebug,
error = printError)
processDocument(args) processDocument(args)
if __name__ == '__main__':
main()