Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • tools/scripts
1 result
Show changes
Commits on Source (30)
......@@ -3,3 +3,4 @@
*/ts-*
*/.python-version
.python-version
toMkdocs/__pycache__
BSD 3-Clause License
Copyright (c) 2024, Miguel Angel Reina Ortega
Copyright (c) 2024, Miguel Angel Reina Ortega & Andreas Kraft
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
......
#
# processMDSpec.py
#
# (c) 2025 by Andreas Kraft
# License: BSD 3-Clause License. See the LICENSE file for further details.
#
""" This script processes markdown specification files. It handles the
include statements and the front matter. It can also render the markdown
content on console or output the front matter only.
"""
from __future__ import annotations
_print = print # save the original print function
from typing import Tuple, Generator
import argparse
from rich import markdown, print
import re, sys, yaml, os
from contextlib import contextmanager
_frontMatter:dict = {}
_includeStack:list[str] = []
@contextmanager
def includeStack(filename:str) -> Generator [None, None, None]:
""" Handle the include stack.
This is used to detect circular includes and to keep track of the
include stack.
Args:
filename: The name of the file being processed.
Raises:
Exception: If a circular include is detected.
Returns:
Generator: A generator that yields nothing.
"""
if filename in _includeStack:
raise Exception(f'Circular include detected: {" -> ".join(_includeStack)} -> {filename}')
_includeStack.append(filename)
yield
_includeStack.pop()
def expandPaths(lines:list[str], currentPath:str, childPath:str) -> list[str]:
""" Expand the paths in the markdown file. This means that all paths in links,
images, and include statements are extended so that they would be valid paths
from the root document.
Args:
lines: The lines of the markdown file.
currentPath: The current path of the file being processed.
childPath: The path of the child file being processed.
Returns:
list[str]: The lines of the markdown file with expanded paths.
"""
# Replace all relative paths in the markdown with the new path
# add a path to the current path
if currentPath[-1] != '/':
currentPath += '/'
newPath = currentPath + childPath
# Remove the leading './' from the path
while newPath.startswith('./'):
newPath = newPath[2:]
inCodeFence = False
for index, line in enumerate(lines):
# Ignore stuff in code fences
if re.match(r'^\s*```.*', line):
inCodeFence = not inCodeFence
continue
if inCodeFence:
continue
# handle the links in a line (there could be multiple links in a line)
links = re.findall(r'\[([^\]]+)\]\(([^\)]+)\)', line)
for linkText, linkPath in links:
# Skip URLs and absolute paths
if linkPath.startswith(('http://', 'https://', '/')):
continue
# Construct the new path by adding addedPath to the original path
newLinkPath = linkPath[2:] if linkPath.startswith('./') else linkPath
# Create the updated path
updatedPath = f"{newPath}{linkPath}" if newPath.endswith('/') else f"{newPath}/{newLinkPath}"
# Replace the original link with the updated one in the markdown
line = line.replace(f'[{linkText}]({linkPath})', f'[{linkText}]({updatedPath})')
# handle the include statements (there should only be one per line)
includes = re.findall(r'^\s*::include{file=([^\}]+)}', line)
for includePath in includes:
# Construct the new path by adding addedPath to the original path
includePath = includePath[2:] if includePath.startswith('./') else includePath
# Create the updated path
updatedPath = f'{newPath}{includePath}' if newPath.endswith('/') else f'{newPath}/{includePath}'
# Replace the original include with the updated one in the markdown
line = line.replace(f'::include{{file={includePath}}}', f'::include{{file={updatedPath}}}')
lines[index] = line
return lines
def processFrontMatter(lines:list[str], args:argparse.Namespace) -> Tuple[dict, list[str]]:
""" Process the front matter of a markdown file. This includes extracting
the front matter information and returning it as a dictionary.
Currently only YAML front matter is supported. It can be extended later.
Args:
lines: The lines of the markdown file.
args: The command line arguments.
Raises:
yaml.YAMLError: If the front matter cannot be parsed as YAML.
Returns:
dict: The front matter information as a dictionary.
list[str]: The lines of the markdown file without the front matter.
"""
if not lines or not lines[0].startswith('---'):
return {}, lines
frontMatterLines:list[str] = []
for line in lines[1:]:
if re.match(r'^---\s*', line):
break
frontMatterLines.append(line)
# Remove the front matter from the lines
lines = lines[len(frontMatterLines)+2:]
# Parse the front matter as YAML
try:
return yaml.safe_load(''.join(frontMatterLines)), lines
except yaml.YAMLError as e:
print(f'[red]Error parsing front matter: {e}')
raise
def processFile(args:argparse.Namespace) -> str:
""" Handle the include statements in the markdown files. This includes
processing the include statements and removing the include statements
from the markdown files.
Args:
args: The command line arguments.
Raises:
Exception: If the file cannot be processed.
Returns:
The processed markdown content as a string.
"""
def handleIncludesForFile(filename:str, currentPath:str) -> str:
""" Read a single markdown file and return its content.
Args:
filename: The name of the file to read.
Raises:
FileNotFoundError: If the file cannot be found.
Returns:
The content of the file.
"""
# Get the directory path from the filename
dirname = os.path.dirname(filename)
if dirname and not dirname.endswith('/'):
dirname = dirname + '/'
dirname = dirname if dirname else '.'
currentPath = currentPath if currentPath else '.'
filename = os.path.normpath(filename)
with includeStack(filename):
try:
with open(filename, 'r') as f:
lines = f.readlines()
except FileNotFoundError:
print(f'[red]File not found: {filename}')
raise
# Expand the paths in the markdown file
# extract front matter information
lines = expandPaths(lines, currentPath, dirname)
fm, lines = processFrontMatter(lines, args)
if fm:
_frontMatter[filename] = fm
if not args.doInclude:
return ''.join(lines)
inCodeFence = False
for line in lines:
# Ignore stuff code fences
if re.match(r'^\s*```.*', line):
inCodeFence = not inCodeFence
continue
if inCodeFence:
continue
# Check for ::include{file=...} pattern using regex at the beginning of a line
match = re.search(r'^::include\{\s*file=(.*?)\s*\}', line.strip())
if match:
includeFilename = match.group(1)
# Read the included file and replace the include statement with its content
lines[lines.index(line)] = handleIncludesForFile(includeFilename, os.path.dirname(filename))
return ''.join(lines)
return handleIncludesForFile(args.document, os.path.dirname(args.document))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Process markdown specification files.')
parser.add_argument('--no-include', dest='doInclude', action='store_false', default=True, help="don't process include statements")
parser.add_argument('--render-markdown', '-md', dest='renderAsMarkdown', action='store_true', help='render output as markdown')
parser.add_argument('--process-frontmatter', '-fm', dest='outputFrontMatter', action='store_true', help='output front matter only')
parser.add_argument('--frontmatter-only', '-fmo', dest='onlyFrontMatter', action='store_true', help='output only front matter')
parser.add_argument('--verbose', '-v', action='store_true', help='print debug information to stderr.')
parser.add_argument('document', type=str, help='a markdown specification document to process')
args = parser.parse_args()
if args.verbose:
if not args.doInclude:
print(f'[yellow]Skipping processing include statements', file=sys.stderr)
else:
print(f'[green]Processing include statements', file=sys.stderr)
try:
lines = processFile(args)
except Exception as e:
print(f'[red]Error while processing {args.document}\n{e}', file=sys.stderr)
quit(1)
if args.outputFrontMatter or args.onlyFrontMatter:
# Collect front matter information in the output
if not args.onlyFrontMatter:
print('---')
# The following is a workaround to keep the order of the dictionary
# see https://stackoverflow.com/a/52621703
yaml.add_representer(dict, lambda self, data: yaml.representer.SafeRepresenter.represent_dict(self, data.items()))
print(yaml.dump(_frontMatter, default_flow_style=False), end='')
if not args.onlyFrontMatter:
print('---')
if not args.onlyFrontMatter:
if args.renderAsMarkdown:
# Render the markdown content
print(markdown.Markdown(lines))
else:
# Print the raw markdown content
_print(lines)
#
# gridTableFilter.py
#
# (c) 2025 by Andreas Kraft & Miguel Angel Reina Ortega
# License: BSD 3-Clause License. See the LICENSE file for further details.
#
""" This script replaces the grid tables in the markdown files with the equivalent
html tables. Other markdown elements are not affected and are passed through.
The script expects the markdown file to be converted from stdin and writes the
result to stdout.
"""
import argparse, sys
from markdownTools import analyseMarkdown, setLoggers
def main() -> None:
# Parse the command line arguments
parser = argparse.ArgumentParser(description='Convert grid tables to html tables. This script reads the markdown file from stdin and writes the result to stdout.')
parser.add_argument('-v', '--verbose', action='store_true', help='Print debug information to stderr.')
args = parser.parse_args()
# Set the loggers
setLoggers(info=lambda m: print(f'[green]{m}', file=sys.stderr) if args.verbose else None,
debug=lambda m: print(f'[dim]{m}', file=sys.stderr) if args.verbose else None,
error=lambda m: print(f'[red]{m}', file=sys.stderr) if args.verbose else None)
# Read the input from stdin and write the result to stdout
print(analyseMarkdown(inLines=sys.stdin.readlines()), file=sys.stdout)
if __name__ == '__main__':
main()
#
# gridTableTools.py
#
# (c) 2025 by Miguel Angel Reina Ortega & Andreas Kraft
# License: BSD 3-Clause License. See the LICENSE file for further details.
#
""" Tools for working with grid tables in markdown files. """
from typing import Optional, Callable
from regexMatches import *
_alignLeft = 'align="left"'
_alignRight = 'align="right"'
_alignCenter = 'align="center"'
_nextListElementMark = '' # Marks a continuing list in the line before. !!! Must be a single character
printInfo = print
printDebug = print
printError = print
def setLoggers(info:Callable=print, debug:Callable=print, error:Callable=print) -> None:
global printInfo, printDebug, printError
printInfo = info
printDebug = debug
printError = error
class GridCell:
""" Represents a grid table cell. """
def __init__(self) -> None:
""" Initialize a new grid table cell.
"""
self.content:Optional[str] = None
self.rowspan:int = 0
self.colspan:int = 0
self.colspanAdjusted:bool = False
self.alignment:str = 'align="center"'
self.positionStart:Optional[int] = None
self.position:Optional[int] = None
self.listFlag:bool = False
self.auxiliarIndex:int = 0
def calculateAndSetAlignment(self,
headerDelimiterPositions:list[int],
delimiterPositions:list[int],
defaultAlignments:list[str],
hasHeader:bool) -> None:
""" Set the alignment of the cell based on the position of the delimiter.
Args:
headerDelimiterPositions: The positions of the header delimiters.
delimiterPositions: The positions of the delimiters.
defaultAlignments: The default alignments.
hasHeader: True if the table has a header, False otherwise.
"""
if self.position is None or self.positionStart is None:
raise ValueError('Cell position must be set before calculating alignment.')
if hasHeader:
headerDelimiterIndex = 0
while headerDelimiterIndex < len(defaultAlignments) and self.positionStart > headerDelimiterPositions[headerDelimiterIndex]:
headerDelimiterIndex += 1
if headerDelimiterIndex < len(defaultAlignments):
self.alignment = defaultAlignments[headerDelimiterIndex]
else:
raise ValueError('Invalid table formatting')
def __str__(self):
return f'(Content: {self.content}, Rowspan: {self.rowspan}, Colspan: {self.colspan}, Alignment: {self.alignment}, Position: {self.position}, ListFlag: {self.listFlag}, AuxiliarIndex: {self.auxiliarIndex})'
def __repr__(self):
return self.__str__()
class GridRow():
""" Represents a row in a grid table. """
cells:list[GridCell] = []
def __init__(self, length: int = 1) -> None:
self.cells = [GridCell() for _ in range(length)]
def __getitem__(self, item):
return self.cells[item]
def __setitem__(self, key, value):
self.cells[key] = value
def __str__(self):
return str(self.cells)
def __repr__(self):
return self.__str__()
class GridRowsTracker():
""" Represents the document object. """
def __init__(self, size:int) -> None:
self.gridRowTracker = [0 for _ in range(size)]
def __getitem__(self, item:int) -> int:
return self.gridRowTracker[item]
def __setitem__(self, key:int, value:int) -> None:
self.gridRowTracker[key] = value
def __str__(self):
return str(self.gridRowTracker)
def __repr__(self):
return self.__str__()
def max(self) -> int:
return max(self.gridRowTracker)
# Some type aliases
GridTableRow = list[GridCell]
GridTableRowList = list[GridTableRow]
def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableRowList]:
"""
Parse a Pandoc-style grid table into a structure for HTML conversion with rowspan and colspan.
:param pandoc_table: String of the Pandoc-style grid table.
:return: List of lists representing the table with metadata for spans.
"""
#global hasHeader, defaultAlignments, headerDelimiterPositions, delimiterPositions, nextListElementMark
# Initialize globals
hasHeader = False
defaultAlignments:list[str] = []
headerDelimiterPositions:list[int] = []
delimiterPositions:list[int] = []
# Split the input into lines
lines:list[str] = [line for line in gridTable.rstrip().split('\n')]
# Detect separator lines by pattern (it does not take into account partial separators
def isSeparator(line:str) -> bool:
return matchGridTableSeparator.match(line) is not None
# Set content on the cell - concatenating multilines, flagging lists
def handleCellContent(cell:GridCell, content:str) -> None:
_c = content.strip()
if cell.content is None: # Previous empty cell
cell.rowspan += 1
cell.colspan += 1
if _c.startswith('- '): # List in a cell
cell.listFlag = True
_c = re.sub(r'\\\s*$', '\n', _c)
cell.content = _c + _nextListElementMark # Add list element end mark to know when the list element ends
elif cell.listFlag and len(_c) > 0: # any other content when handling list is concatenated to the last list element
_c = re.sub(r'\\\s*$', '\n', _c)
cell.content = _c + _nextListElementMark #add the list element end mark
elif not _c: # empty line. separation between list and other paragraph
# cell.content = '\n' if not cell.content.endswith('\n') else ""
cell.content = '\n' # cell content is always empty / None here.
else:
cell.content = re.sub(r'\\\s*$', '\n', _c)
else: # Cell has content
if _c.startswith('- '): # List
if not cell.listFlag:
cell.content += '\n'
#cell['content'] = cell['content'].strip("\n")
cell.listFlag = True
_c = re.sub(r'\\\s*$', '\n', _c)
cell.content += _c + _nextListElementMark # Add list element end mark to know when the list element ends
elif cell.listFlag and len(_c) > 0: # any other content when handling list is concatenated to the last list element
# cell.content = cell.content.strip(nextListElementMark) #remove list element end mark
cell.content = cell.content.removesuffix(_nextListElementMark) #remove list element end mark
_c = re.sub(r'\\\s*$', '\n', _c)
cell.content += ' ' + _c + _nextListElementMark #add list element end mark
elif len(_c) == 0: # separation between list and other paragraph
if cell.listFlag:
cell.listFlag = False
cell.content += '\n\n' #end list by \n
#content = re.sub(r'\\\s*$', "\n", content.strip())
cell.content += '\n' if not cell.content.endswith('\n') else ''
else:
cell.content += ' ' + re.sub(r'\\\s*$', '\n', _c)
# Adjust colspan of a cell
def adjustColspan(row:GridRow, columnIndex:int, numberOfParts:int, line, numberOfColumns:int, delimiterPositions:list[int]) -> None:
for j in range(columnIndex, numberOfParts):
delimiterStart:Optional[int] = None
colI = columnIndex
while delimiterStart == None:
delimiterStart = row[colI - 1].position if colI > 0 else 0
colI -= 1
positions = [line.find(delimiter, delimiterStart + 1) for delimiter in "|+" if delimiter in line[delimiterStart + 1:]]
position = min(positions) if positions else -1
if position > delimiterPositions[j]: # Colspan to be increased
row[columnIndex].colspan += 1
if position == delimiterPositions[len(delimiterPositions) - 1]: # last cell in row, adjust colspan to get max number columns
colspan_allocated = row[columnIndex].colspan
row[columnIndex].colspan += numberOfColumns - colspan_allocated - columnIndex
elif position < delimiterPositions[j]:
raise ValueError("Wrong cell formatting")
else:
break
row[columnIndex].colspanAdjusted = True # Mark cell as adjusted
def checkDelimiterAlignment(line: str, delimiterPositions:list[int], delimiters: str = "|+") -> bool:
"""
Check if delimiters in a row align with expected positions.
Args:
line: The line of text to check
delimiter_positions: List of expected positions (based on + characters)
delimiters: String containing valid delimiter characters (default: "|+")
Returns:
bool: True if delimiters align correctly, False otherwise
"""
if not line or not delimiterPositions:
return False
printDebug(f'\nChecking line: "{line}"')
printDebug(f'Expected delimiter positions: {delimiterPositions}')
# For full separator lines (only +)
if '+' in line and '|' not in line:
currentPositions = [i for i, char in enumerate(line) if (char == '+' and i > 0)]
printDebug(f'Full separator line - Found + at positions: {currentPositions}')
return all(delimiterPositions[-1] in currentPositions and line.startswith('+') and pos in delimiterPositions
for pos in currentPositions)
# For data lines (only |)
if '|' in line and '+' not in line:
currentPositions = [i for i, char in enumerate(line) if (char == '|' and i > 0)]
printDebug(f'Data line - Found | at positions: {currentPositions}')
return all(delimiterPositions[-1] in currentPositions and line.startswith("|") and pos in delimiterPositions
for pos in currentPositions)
# For partial separators (mix of + and |)
currentPositions = [i for i, char in enumerate(line) if (char in delimiters and i > 0)]
printDebug(f'Partial separator - Found delimiters at positions: {currentPositions}')
printDebug(f'Characters at those positions: {[line[pos] for pos in currentPositions]}')
return all(delimiterPositions[-1] in currentPositions and line.startswith(('+', '|')) and pos in delimiterPositions
for pos in currentPositions)
separatorIndices = [i for i, line in enumerate(lines) if isSeparator(line)]
if not separatorIndices:
raise ValueError('No valid separators found in the provided grid table.')
# Calculate max number of columns
delimiterPositions = []
numberOfColumns:int = 0
for separatorIndex in separatorIndices:
if (_cnt := lines[separatorIndex].count('+') - 1) > numberOfColumns:
numberOfColumns = _cnt
delimiterPositions = []
for rowIndex in range(numberOfColumns):
delimiterPositionsStart = delimiterPositions[rowIndex - 1] if rowIndex != 0 else 0
delPositions = [lines[separatorIndex].find(delimiter, delimiterPositionsStart + 1)
for delimiter in '+' if delimiter in lines[separatorIndex][delimiterPositionsStart + 1:]]
delimiterPositions.append(min(delPositions) if delPositions else -1)
# Determine delimter positions and alignments
headerRows:GridTableRowList = []
dataRows:GridTableRowList = []
for index in separatorIndices:
if matchGridTableHeaderSeparator.match(lines[index]):
hasHeader = True
headerSeparatorIndex = index
parts = re.split(r'\+', lines[index].strip('+'))
#Calculate default alignments and positions of delimiters
for partIndex in range(len(parts)):
# Left alignment
if parts[partIndex].startswith(':') and not parts[partIndex].endswith(':'):
defaultAlignments.append(_alignLeft)
# Right alignment
elif not parts[partIndex].startswith(':') and parts[partIndex].endswith(':'):
defaultAlignments.append(_alignRight)
# Center alignment
else:
defaultAlignments.append(_alignCenter)
# Delimiter position
delimiterPositionsStart = delimiterPositions[partIndex - 1] if partIndex != 0 else 0
delPositions = [lines[index].find(delimiter, delimiterPositionsStart + 1)
for delimiter in '+' if delimiter in lines[index][delimiterPositionsStart + 1:]]
headerDelimiterPositions.append(min(delPositions) if delPositions else -1)
if not hasHeader:
# Set default alignments from the first separator which takes the role of header
hasHeader = True
headerSeparatorIndex = 0
parts = re.split(r'\+', lines[0].strip('+'))
# Calculate default alignments and positions of delimiters
for partIndex in range(len(parts)):
if parts[partIndex].startswith(':') and not parts[partIndex].endswith(':'):
defaultAlignments.append(_alignLeft)
elif not parts[partIndex].startswith(':') and parts[partIndex].endswith(':'):
defaultAlignments.append(_alignRight)
else:
defaultAlignments.append(_alignCenter)
# Delimiter position
delimiterPositionsStart = delimiterPositions[partIndex - 1] if partIndex != 0 else 0
delPositions = [lines[index].find(delimiter, delimiterPositionsStart + 1)
for delimiter in '+' if delimiter in lines[index][delimiterPositionsStart + 1:]]
headerDelimiterPositions.append(min(delPositions) if delPositions else -1)
#Check end table delimiter alignment (not checked during the lines processing)
if not checkDelimiterAlignment(lines[-1], delimiterPositions):
raise ValueError(f'Misaligned delimiters in end table separator: {lines[-1]}')
for rowNumber in range(len(separatorIndices) - 1):
rows:list[GridRow] = []
rowsTracker:GridRowsTracker
inDataRow = False
start, end = separatorIndices[rowNumber], separatorIndices[rowNumber + 1]
rowLines = lines[start:end] # Lines between separators including separator line start as it gives information about the number of columns of the row
if rowLines:
# Combine multiline content into single strings for each cell
for line in rowLines:
line = line.rstrip()
if isSeparator(line) and not inDataRow:
inDataRow = True
# Add delimiter alignment check for separator lines
if not checkDelimiterAlignment(line, delimiterPositions):
raise ValueError(f'Misaligned delimiters in separator row: {line}')
parts = re.split(r'\s*\+\s*', line.strip('+'))
delimiterIndex = 0
rows.append(GridRow(numberOfColumns))
rowsTracker = GridRowsTracker(numberOfColumns)
columnIndex = 0
for rowIndex in range(len(parts)):
if columnIndex in range(numberOfColumns):
delimiterIndex += len(parts[rowIndex]) + 1
cell = rows[-1][columnIndex]
# Set position
cell.positionStart = delimiterIndex - len(parts[rowIndex])
cell.position = delimiterIndex # Position of cell delimiter +
# Set alignment as defined by header separator line
cell.calculateAndSetAlignment(headerDelimiterPositions, delimiterPositions, defaultAlignments, hasHeader)
while delimiterIndex > delimiterPositions[columnIndex]:
columnIndex += 1
columnIndex += 1
elif inDataRow:
# Regular data row or partial separator
if matchGridTableBodySeparator.match(line): # Partial separator
# Add delimiter alignment check for partial separators
if not checkDelimiterAlignment(line, delimiterPositions):
raise ValueError(f'Misaligned delimiters in partial separator: {line}')
cellsContent = re.split(r'[\|\+]', line.strip('|').strip('+')) # (?<!\\)[\|\+]
#Add another row, set delimiters for each cell
rows.append(GridRow(numberOfColumns))
auxDelimiterIndex = 0
auxiliarCellIndex = 0
for columnIndex, content in enumerate(cellsContent):
if auxiliarCellIndex < numberOfColumns:
auxDelimiterIndex += len(content) + 1
cell = rows[-1][auxiliarCellIndex]
cell.positionStart = auxDelimiterIndex - len(content) # Position of cell delimiter +
cell.position = auxDelimiterIndex # Position of cell delimiter +
cell.calculateAndSetAlignment(headerDelimiterPositions, delimiterPositions, defaultAlignments, hasHeader)
while auxDelimiterIndex > delimiterPositions[auxiliarCellIndex]:
auxiliarCellIndex += 1
auxiliarCellIndex += 1
if len(cellsContent) <= numberOfColumns: # Colspan: Positions of | with respect to + need to be determined
columnCellIndex = 0
# Put the value in a variable here because we need the initial value
maxRowsTracker = rowsTracker.max()
# Go through all cells in a columnt
for columnIndex, content in enumerate(cellsContent):
rowIndex = rowsTracker[columnCellIndex]
cell = rows[rowIndex][columnCellIndex]
# Check whether a cell contains a header separator
if matchGridTableBodySeparatorLine.match(content): # A new row is to be added
rowsTracker[columnCellIndex] = maxRowsTracker + 1 # That actual row will have more than one row
rowIndex = rowsTracker[columnCellIndex]
cell = rows[rowIndex][columnCellIndex]
cell.listFlag = False
columnForward = 0
for delIndex in range(columnCellIndex, len(delimiterPositions)):
rowIndex = rowsTracker[columnCellIndex] # Correcting the rowIndex. Might have been changed by a previous iteration
if rows[rowIndex][columnCellIndex].position >= delimiterPositions[delIndex]:
columnForward += 1
#rowsTracker[columnCellIndex + columnForward - 1] = maxRowsTracker + 1 if columnForward > 1 else 0
columnCellIndex += columnForward
continue
else:
# Handle content of the cell
handleCellContent(cell, cellsContent[columnIndex])
cell.rowspan += 1
if not cell.colspanAdjusted:
# TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
adjustColspan(rows[rowIndex], columnCellIndex, numberOfColumns, line, numberOfColumns, delimiterPositions)
if cell.position >= delimiterPositions[columnCellIndex]:
columnCellIndex += cell.colspan if cell.colspan != 0 else 1
continue
else:
raise ValueError(f'More cells than columns found ({len(cellsContent)} {numberOfColumns})')
else: # Data row
cellsContent = re.split(r'\|', line.strip('|'))
# Add delimiter alignment check
if not checkDelimiterAlignment(line, delimiterPositions):
raise ValueError(f'Misaligned delimiters in row: {line}')
columnCellIndex = 0
if len(cellsContent) < numberOfColumns: # Colspan: Positions of | with respect to + need to be determined
for columnIndex, content in enumerate(cellsContent):
row = rows[rowsTracker[columnCellIndex]]
cell = row[columnCellIndex]
# Handle content of the cell
handleCellContent(cell, content)
if not cell.colspanAdjusted:
#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
adjustColspan(row, columnCellIndex, numberOfColumns, line, numberOfColumns, delimiterPositions)
if cell.position >= delimiterPositions[columnCellIndex]:
columnCellIndex += cell.colspan # Move forward index i
elif len(cellsContent) == numberOfColumns: # Simple row
for columnIndex, content in enumerate(cellsContent):
rowIndex = rowsTracker[columnIndex]
handleCellContent(rows[rowIndex][columnIndex], content)
else:
raise ValueError(f'More cells than columns found ({len(cellsContent)} {numberOfColumns})')
else:
raise ValueError('No separator line found for row starting')
if hasHeader and start >= headerSeparatorIndex: # table_row and auxiliar_row are part of data_rows
for row in rows:
dataRows.append(row.cells)
elif hasHeader and start < headerSeparatorIndex: # table_row and auxiliar_row are part of header_rows
for row in rows: # header rows
headerRows.append(row.cells)
else:
#only body
for row in rows:
dataRows.append(row.cells)
# Check if there are any data rows
if not dataRows and not headerRows:
raise ValueError('No valid rows found in the provided grid table.')
# Format text
for gridRows in [headerRows, dataRows]:
for gridRow in gridRows:
for cell in gridRow:
if cell.content is not None:
# Replacing "<" by &lt;
cell.content = cell.content.replace('<', '&lt;')
# Bold replacements
# Regex to detect markdown bold formatting in cell content
if cell.content is not None:
cell.content = matchBold.sub(r'\1<strong>\g<text></strong>', cell.content)
# Italic replacements
# Regex to detect markdown italic formatting in cell content
if cell.content is not None:
cell.content = matchItalic.sub(r'\1<i>\g<text></i>', cell.content)
# Correct newlines characters
for headerRow in headerRows:
for cell in headerRow:
cell.content = cell.content.replace('\n', '<br />') if cell.content is not None else None
for dataRow in dataRows:
for cell in dataRow:
cell.content = cell.content.replace('\n', '<br />') if cell.content is not None else None
#
# Checking that the grid is correct Not too much tested - need to take into account rowspan of previous rows
#
# Checking the header rows
forwardRowspan:list[int] = []
for idx, headerRow in enumerate(headerRows):
if len(forwardRowspan) == 0:
forwardRowspan = [0] * len(headerRows[idx])
sum = 0
for cellIndex, cell in enumerate(headerRow):
sum += cell.colspan
if idx > 0 and cell.colspan == 0:
if forwardRowspan[cellIndex] > 0:
sum += 1
forwardRowspan[cellIndex] -= 1
if forwardRowspan[cellIndex] == 0 and cell.rowspan > 1:
forwardRowspan[cellIndex] = cell.rowspan -1
colspan=1
while cell.colspan > colspan:
forwardRowspan[cellIndex + colspan] = cell.rowspan - 1
colspan += 1
if not sum == numberOfColumns:
raise ValueError('Grid table not converted properly')
# Checking the data rows
forwardRowspan = []
for idx, dataRow in enumerate(dataRows):
if len(forwardRowspan) == 0:
forwardRowspan = [0] * len(dataRows[idx])
sum = 0
for cellIndex, cell in enumerate(dataRows[idx]):
sum += cell.colspan
if idx > 0 and cell.colspan == 0:
if forwardRowspan[cellIndex] > 0:
sum += 1
forwardRowspan[cellIndex] -= 1
if forwardRowspan[cellIndex] == 0 and cell.rowspan > 1:
forwardRowspan[cellIndex] = cell.rowspan - 1
colspan=1
while cell.colspan > colspan:
forwardRowspan[cellIndex + colspan] = cell.rowspan - 1
colspan += 1
if not sum == numberOfColumns:
raise ValueError('Grid table not converted properly')
return headerRows, dataRows
def generateHtmlTableWithSpans(gridTable:str) -> str:
""" Generate an HTML table from a Pandoc-style grid table with row and column spans.
Args:
gridTable: The Pandoc-style grid table.
Returns:
The HTML table in string format.
"""
regex1 = r'\s*([-*+]|\s*\d+\.)\s+((?:(?!' + re.escape(_nextListElementMark) + r').)+)' + re.escape(_nextListElementMark)
regex2 = r'(\s*([-*+]|\s*\d+\.)\s+(?:(?!∆).)+' + re.escape(_nextListElementMark) + r')+'
try:
gridHeader, gridBody = parseGridTableWithSpans(gridTable)
except Exception as e:
printDebug('Grid table could not be generated')
raise RuntimeError(f'HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE:\n{str(e)}')
# Generate table HTML...
html = '<table>\n'
hasHeader = False
for row in gridHeader:
for cell in row:
if cell.rowspan != 0 and cell.colspan != 0:
hasHeader = True
break
if hasHeader:
html += ' <thead>\n'
for row in gridHeader:
html += " <tr>\n"
for cell in row:
if cell.rowspan == 0 or cell.colspan == 0:
continue
else:
# Prepare content, in case there's a list
if cell.content is not None and (matches := re.findall(regex1, cell.content)): # Update cell in new row
list = '<ul>'
# Build list the matches
for match in matches:
list += '<li>' + match[1] + '</li>'
list += '</ul>'
cell.content = re.sub(regex2, list, cell.content)
# Enforce left alignment if cell contains a list
cell.alignment = _alignLeft
rowspan = f' rowspan="{cell.rowspan}"' if cell.rowspan > 1 else ''
colspan = f' colspan="{cell.colspan}"' if cell.colspan > 1 else ''
html += f' <th{rowspan}{colspan} {cell.alignment}>{cell.content}</th>\n'
html += ' </tr>\n'
html += ' </thead>\n'
html += ' <tbody>\n'
for row in gridBody:
html += ' <tr>\n'
for cell in row:
if cell.rowspan == 0 or cell.colspan == 0:
continue
else:
#Prepare content, in case there's a list
if cell.content is not None and (matches := re.findall(regex1, cell.content)): # Update cell in new row
list = '<ul>'
# Build list the matches
for match in matches:
list += f'<li>{match[1]}</li>'
list += '</ul>'
cell.content = re.sub(regex2, list, cell.content)
# Enforce left alignment if cell contains a list
cell.alignment = _alignLeft
rowspan = f' rowspan="{cell.rowspan}"' if cell.rowspan > 1 else ''
colspan = f' colspan="{cell.colspan}"' if cell.colspan > 1 else ''
html += f' <td{rowspan}{colspan} {cell.alignment}>{cell.content}</td>\n'
html += ' </tr>\n'
html += ' </tbody>\n'
html += '</table>'
return html
#
# markdownTools.py
#
# (c) 2025 by Andreas Kraft & Miguel Angel Reina Ortega
# License: BSD 3-Clause License. See the LICENSE file for further details.
""" Various tools for markdown processing
"""
from __future__ import annotations
from typing import Callable, Optional
from dataclasses import dataclass
import base64, hashlib
from enum import Enum, auto
from gridTableTools import generateHtmlTableWithSpans, setLoggers as setGridTableLoggers
from regexMatches import *
# TODO use a verbosity level instead
verbose = False
veryVerbose = False
printInfo = print
printDebug = print
printError = print
def setLoggers(info:Callable = print, debug:Callable = print, error:Callable= print) -> None:
global printInfo, printDebug, printError
printInfo = info
printDebug = debug
printError = error
# Set the loggers for the grid table tools
setGridTableLoggers(info, debug, error)
def _shortHash(value:str, length:int) -> str:
""" Generate a short hash of a string value.
Args:
value: The value to hash.
length: The length of the hash.
Returns:
The hash.
"""
return base64.b64encode(
hashlib.sha256(
value.encode()
).digest()
).decode()[:length]
class LineType(Enum):
""" Represents the type of a line in the markdown file. """
HEADING = auto()
TEXT = auto()
CODEFENCESTART = auto()
CODE = auto()
CODEFENCEEND = auto()
LIST = auto()
NOTE = auto()
STANDALONEIMAGE = auto()
TABLEHEADER = auto()
TABLESEPARATOR = auto()
TABLEROW = auto()
TABLELASTROW = auto()
RAWHTML = auto()
@dataclass
class Line:
""" Represents a line in the markdown file. """
text:str = '\n'
lineType:LineType = LineType.TEXT
def __str__(self) -> str:
""" Return the line as a string. """
return self.text
def __repr__(self) -> str:
""" Return the line as a string. """
return self.__str__()
@dataclass
class Clause:
""" Represents a clause in the markdown file. """
_level:int
_clauseNumber:str
_title:str
_lines:list[Line]
@property
def level(self) -> int:
""" Return the level of the clause. """
return self._level
@property
def clauseNumber(self) -> str:
""" Return the clause number. """
return self._clauseNumber if self._clauseNumber else '0'
@clauseNumber.setter
def clauseNumber(self, value:str) -> None:
""" Set the clause number. """
self._clauseNumber = value
@property
def title(self) -> str:
""" Return the title of the clause. """
return self._title
@title.setter
def title(self, value:str) -> None:
""" Set the title of the clause. """
self._title = value
@property
def lines(self) -> list[Line]:
""" Return the lines of the clause. """
return self._lines
@lines.setter
def lines(self, value:list[Line]) -> None:
""" Set the lines of the clause. """
self._lines = value
@property
def linesCount(self) -> int:
""" Return the number of lines in the clause.
Returns:
The number of lines in the clause.
"""
return len(self.lines)
def append(self, line:Line) -> None:
""" Append a line to the clause.
Args:
line: The line to append.
"""
self.lines.append(line)
def extend(self, clause:Clause) -> None:
""" Extend the clause with the lines of another clause.
Args:
clause: The clause to extend with.
"""
self.lines.extend(clause.lines)
def asStringList(self, paddings:int = 0) -> list[str]:
""" Return the clause as a list of strings.
Args:
paddings: The number of empty lines to add before the clause.
Returns:
The clause's lines as a list of strings.
"""
return [ '\n' for _ in range(paddings) ] + [ l.text for l in self.lines ]
def __len__(self) -> int:
""" Return the number of characters in the clause. This does not include
empty lines or lines that contain only whitespace.
Returns:
The number of characters in the clause.
"""
return sum([ len(l.text.strip()) for l in self.lines ])
def __str__(self) -> str:
""" Return the clause as a string. """
return ''.join([str(l) for l in self.lines ])
def __repr__(self) -> str:
""" Return the clause as a string. """
return self.__str__()
class Footnote:
""" Represents a footnote in the markdown file. """
def __init__(self, id:str, line:Line) -> None:
""" Constructor.
Args:
id: The id of the footnote.
line: The line of the footnote.
"""
self.id = id
""" The id of the footnote. """
self.line = line
""" The line of the footnote. """
def __str__(self) -> str:
return self.line.text
def __repr__(self) -> str:
return self.__str__()
class Document:
""" Represents the document object. """
clauses:list[Clause] = []
footnotes:list[Footnote] = []
def __init__(self, clauses:list[Clause], footnotes:list[Footnote] = []) -> None:
self.clauses = clauses
self.footnotes = footnotes
def splitMarkdownDocument(self,
ignoreTitles:list[str] = [],
splitLevel:int = 1,
ignoreUntilFirstHeading:bool = False) -> None:
""" Split the clauses at a certain level. This is used to create the separate
markdown files for MkDocs.
After the split, the clauses are stored in the document object.
Args:
ignoreTitles: A list of titles that should be ignored. They are not included in the output.
splitLevel: The level at which the clauses should be split.
ignoreUntilFirstHeader: Ignore all clauses until the first heading.
"""
result:list[Clause] = []
ignoreTitles = [ t.casefold() for t in ignoreTitles ] # convert to lower case
for clause in self.clauses:
level = clause.level
# Check if the current clause should be ignored
if clause.title.casefold() in ignoreTitles:
continue
# Add a new output clause if the current clause's level is
# equal or less than the split level
if clause.level <= splitLevel:
result.append(Clause(level, clause.clauseNumber, clause.title, []))
# Add the lines to the output clause
result[-1].extend(clause)
# Remove the first clause if it has no title
if ignoreUntilFirstHeading:
while len(result[0].title) == 0:
result.pop(0)
self.clauses = result
def insertFootnotes(self) -> None:
""" Insert footnotes into the clauses.
After the insertion, the clauses are stored in the document object.
"""
printInfo('Adding footnotes to clauses')
for clause in self.clauses:
foundFootnotes:list[Footnote] = []
for line in clause.lines:
# ATTN: Only footnotes in normal text lines are checked
if line.lineType == LineType.TEXT and (fn := MatchInlineFootnote.search(line.text)):
# Find the footnote in the list of footnotes
for f in self.footnotes:
if f.id == fn.groups()[0]:
foundFootnotes.append(f)
# Insert the footnotes at the end of the clause
if len(foundFootnotes) > 0:
clause.append(Line('\n', LineType.TEXT))
for f in foundFootnotes:
clause.append(f.line)
def updateLinks(self) -> None:
""" Update the links in the clauses to the new structure. This is done by
creating a dictionary of all links and their targets and then replacing
the links in the clauses.
After the update, the clauses are stored in the document object.
"""
printInfo('Updating links in clauses')
# Build the link target dictionary. Mapping anchor -> clause
linkTargets:dict[str, Clause] = {}
# Find all Markdown headers in the clauses and convert them to anchor format
for i, clause in enumerate(self.clauses):
# Find all headers in the clause
for line in clause.lines:
if (m := matchHeader.match(line.text)):
# convert the header to anchor format and add it to the dictionary
# Remove special characters
# TODO move perhaps to an own function
anchor = m.groups()[1].strip().casefold().replace(' ', '-')
for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'):
anchor = anchor.replace(c, '')
# remove html tags from the anchor
anchor = re.sub(matchHtmlTag, '', anchor)
linkTargets[f'#{anchor}'] = clause
if veryVerbose:
printDebug(f'Added Markdown anchor "{anchor}"')
# Find all HTML anchors in the clauses and add them to the dictionary
for i, clause in enumerate(self.clauses):
for line in clause.lines:
if (anchors := matchHtmlAnchorLink.findall(line.text)):
for a in anchors:
linkTargets[f'#{a}'] = clause
if veryVerbose:
printDebug(f'Found HTML anchor "{a}" in clause "{clause.title}"')
# Replace the html links
for clause in self.clauses:
for i, line in enumerate(clause.lines):
if (links := matchHtmlLink.findall(line.text)):
for lnk in links:
if lnk in linkTargets:
line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[lnk].clauseNumber}/#{lnk[1:]}') # Update the current line as well
if veryVerbose:
printDebug(f'Updated HTML link "{lnk}" in clause "{clause.title}"')
# Replace the markdown links
for clause in self.clauses:
for i, line in enumerate(clause.lines):
if (links := markdownLink.findall(line.text)):
# Replace the old link targets with converted
# (lower case) versions that point to the output files
for lnk in links:
_lnk =lnk.casefold()
if _lnk in linkTargets:
line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[_lnk].clauseNumber}/#{lnk[1:]}') # Update the current line as well
if veryVerbose:
printDebug(f'Updated Markdown link "{lnk}" in clause "{clause.title}"')
def updateNotes(self) -> None:
""" Update the notes in the clauses to the mkDocs notes version.
After the update, the clauses are stored in the document object.
"""
printInfo('Updating notes in clauses')
for clause in self.clauses:
lines:list[Line] = []
inNote = False
for line in clause.lines:
if line.lineType == LineType.NOTE:
if not inNote:
lines.append(Line('\n', LineType.TEXT))
lines.append(Line('!!! note\n', LineType.NOTE))
inNote = True
lines.append(Line(f"\t{re.sub(matchNoteStart, '', line.text)}", LineType.NOTE))
if verbose:
printDebug(f'Converted note in clause "{clause.title}"')
else:
if inNote:
lines.append(Line('\n', LineType.TEXT))
inNote = False
lines.append(line)
clause.lines = lines
def __str__(self) -> str:
""" Return the document as a string. """
return '\n'.join([ str(c) for c in self.clauses + self.footnotes ])
def __repr__(self) -> str:
""" Return the document as a string. """
return self.__str__()
def analyseMarkdown(filename:Optional[str]=None, inLines:Optional[list[str]]=None) -> Document:
""" Analyse the markdown file and split it into clauses.
Either the filename or the inLines must be provided.
Args:
filename: The name of the markdown file.
inLines: The lines of the markdown file.
Returns:
The document object.
"""
gridTable:str = ''
def processGridTable() -> None:
""" Process a grid table and convert it to an html table.
This function adds the html table to the output clauses and
clears the gridTable variable.
"""
nonlocal gridTable
htmltable:str = ''
try:
htmltable = generateHtmlTableWithSpans(gridTable)
printDebug(htmltable)
except Exception as e:
printError(f"Error: {e}")
outClauses[-1].append(Line(htmltable, LineType.RAWHTML))
gridTable = ''
printInfo(f'Analyzing "{filename}"')
# Read the file.
# Note: We use utf-8 and replace errors to avoid problems with special or unknown characters.
if filename and not inLines:
with open(filename, 'r', encoding = 'utf-8', errors = 'replace') as file:
inLines = file.readlines()
elif not filename and inLines:
pass
else:
raise ValueError('Either the filename or the lines must be provided.')
# The list of clauses. The first clause contains the text before the first heading.
outClauses:list[Clause] = [Clause(0, '', '', [])]
footnotes:list[Footnote] = []
# Go through the lines and detect headers and codefences
inCodefence = False
inTable = False
tableHasSeparator = False
inGridTable = False
for line in inLines:
# Detect and handle codefences
# For the moment we support only codefences that start and end
# with 3 backticks. This is the most common way to define codefences.
# Note, that longer codefences are allowed by the markdown specification.
if matchCodefenceStart.match(line) and not inCodefence:
inCodefence = True
outClauses[-1].append(Line(line, LineType.CODEFENCESTART))
continue
if matchCodefenceEnd.match(line):
inCodefence = False
outClauses[-1].append(Line(line, LineType.CODEFENCEEND))
continue
if inCodefence:
outClauses[-1].append(Line(line, LineType.CODE))
continue
# Detect and handle tables
if matchTable.match(line) and not inTable and not inGridTable:
inTable = True
outClauses[-1].append(Line(line, LineType.TABLEHEADER))
continue
if inTable:
if matchTableSeparator.match(line) and not tableHasSeparator:
outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
tableHasSeparator = True
continue
elif matchTable.match(line):
outClauses[-1].append(Line(line, LineType.TABLEROW))
continue
else:
inTable = False
tableHasSeparator = False
# Mark the previous line as the last row in the table
outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW
# continue with other matches
#Detect grid tables and convert them to html table
if matchGridTable.match(line) and not inGridTable:
inGridTable = True
#outClauses[-1].append(Line(line, LineType.TABLEHEADER))
gridTable += line
continue
if inGridTable:
if matchGridTableHeaderSeparator.match(line) or matchGridTableBodySeparator.match(line):
#outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
gridTable += line
continue
elif matchTable.match(line):
#outClauses[-1].append(Line(line, LineType.TABLEROW))
gridTable += line
continue
else:
inGridTable = False
processGridTable()
# continue with other matches
# Detect notes
# Notes are lines that start with a '>'.
if matchNote.match(line):
outClauses[-1].append(Line(line, LineType.NOTE))
continue
# Detect footnotes
# Footnotes are lines that start with a '^'
if (_fn := matchFootnote.match(line)):
footnotes.append(Footnote(_fn.groups()[0], Line(line, LineType.TEXT)))
continue
# Detect images on a single line
if (m := matchStandAloneImage.match(line)):
outClauses[-1].append(Line(line, LineType.STANDALONEIMAGE))
continue
# Detect headers
_lineType = LineType.TEXT
if (m := matchHeader.match(line)):
# Add a new clause
clauseTitle = m.groups()[1].strip()
clauseTitle = re.sub(matchHtmlTag, '', clauseTitle)
headerNumber = matchHeaderNumber.search(clauseTitle)
outClauses.append(Clause(len(m.groups()[0]), # level
headerNumber.group() if headerNumber else _shortHash(clauseTitle, 6),
clauseTitle,
[]))
_lineType = LineType.HEADING
# Just add the line to the current clause as text
outClauses[-1].append(Line(line, _lineType))
# Process still unfinished cases
if gridTable:
processGridTable()
return Document(outClauses, footnotes)
def main() -> None:
"""Hauptfunktion zur Verarbeitung von Markdown-Dateien über die Kommandozeile."""
import argparse
parser = argparse.ArgumentParser(description='Markdown-Dateien verarbeiten, um Gittertabellen zu konvertieren und andere Formatierungen zu handhaben')
parser.add_argument('eingabe', help='Eingabe-Markdown-Datei')
parser.add_argument('-v', '--verbose', action='store_true', help='Ausführliche Ausgabe aktivieren')
parser.add_argument('-vv', '--sehr-verbose', action='store_true', help='Sehr ausführliche Ausgabe aktivieren')
parser.add_argument('-i', '--ignoriere-titel', nargs='+', default=[], help='Liste der zu ignorierenden Titel')
parser.add_argument('-s', '--teilungs-ebene', type=int, default=1, help='Ebene, auf der das Dokument geteilt werden soll (Standard: 1)')
parser.add_argument('-f', '--ignoriere-erste', action='store_true', help='Inhalt bis zur ersten Überschrift ignorieren')
args = parser.parse_args()
# Verbositätsebenen setzen
global verbose, veryVerbose
verbose = args.verbose
veryVerbose = args.sehr_verbose
# Markdown-Datei verarbeiten
doc = analyseMarkdown(args.eingabe)
# Dokument teilen und verarbeiten
doc.splitMarkdownDocument(
ignoreTitles=args.ignoriere_titel,
splitLevel=args.teilungs_ebene,
ignoreUntilFirstHeading=args.ignoriere_erste
)
# Dokumentenelemente aktualisieren
doc.insertFootnotes()
doc.updateLinks()
doc.updateNotes()
# Verarbeitetes Dokument ausgeben
for clause in doc.clauses:
print(f"\n{'#' * clause.level} {clause.title}")
for line in clause.lines:
print(line.text, end='')
if __name__ == '__main__':
main()
#
# regexMatches.py
#
# (c) 2025 by Andreas Kraft & Miguel Angel Reina Ortega
# License: BSD 3-Clause License. See the LICENSE file for further details.
#
""" This module contains the regular expressions used in the markdown processing.
"""
import re
# Regular expressions
match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
matchFootnote = re.compile(r'\[\^([^\]]*)\]:', re.IGNORECASE)
matchHtmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE)
matchHtmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
matchHtmlTag = re.compile(r'<[^>]*>', re.IGNORECASE)
MatchInlineFootnote = re.compile(r'\[\^([^\]]*)\]', re.IGNORECASE)
markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE)
matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE)
matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE)
matchGridTable = re.compile(r'^\s*\+-.*\+\s$', re.IGNORECASE)
matchGridTableBodySeparator = re.compile(r'.*\+([:-]+\+)+.*$', re.IGNORECASE)
matchGridTableBodySeparatorLine = re.compile(r'[-:]+$', re.IGNORECASE)
matchGridTableHeaderSeparator = re.compile(r'.*\+([=:]+\+)+.*$', re.IGNORECASE)
matchGridTableSeparator = re.compile(r'\s*\+([-:=]+\+)+\s*$', re.IGNORECASE)
matchGridTableBodySeparator = re.compile(r'.*\+([:-]+\+)+.*$', re.IGNORECASE)
matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE)
matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE)
matchListInContent = re.compile(r'^(?:\s*(P<marker>[-*+]|\s*\d+\.))\s+(P<content>.+)$', re.IGNORECASE)
matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE)
matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE)
matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECASE)
matchTable = re.compile(r'^\s*\|.*\|\s*$', re.IGNORECASE)
matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE)
matchBold = re.compile(r'(^|\s)(\*\*|__)(?P<text>.+?)\2(?!\w)')
matchItalic = re.compile(r'(^|\s)(\*|_)(?P<text>.+?)\2(?!\w)')
\ No newline at end of file
This diff is collapsed.