From 4299b86171693b1493f3f6fd3e006b7db731a1e1 Mon Sep 17 00:00:00 2001 From: ankraft <an.kraft@gmail.com> Date: Wed, 23 Oct 2024 22:53:21 +0200 Subject: [PATCH] Added table separator correction. the length of the separators is used in pandoc to calculate column widths. this is not what we need, and could be wrong in some cases. --- generateChangemarks/pandocFilter.py | 39 ++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/generateChangemarks/pandocFilter.py b/generateChangemarks/pandocFilter.py index 23e6a25..2550dc3 100644 --- a/generateChangemarks/pandocFilter.py +++ b/generateChangemarks/pandocFilter.py @@ -8,7 +8,7 @@ # import argparse, os, re, sys -from rich import print +from rich import print, inspect from rich.progress import Progress, TextColumn, TimeElapsedColumn def readMDFile(progress:Progress, document:str) -> list[str]: @@ -39,6 +39,16 @@ def writeMDFile(progress:Progress, mdLines:list[str], document:str, outDirectory progress.stop_task(_taskID) +_inCodeBlock = False +def checkInCodeBlock(line:str) -> bool: + global _inCodeBlock + + # Check if we are in a code block + if line.strip().startswith('```'): + _inCodeBlock = not _inCodeBlock + return _inCodeBlock + + def correctTOC(progress:Progress, mdLines:list[str], tocSection:str = 'Contents') -> list[str]: """ Correct the TOC to be compatible with pandoc. """ @@ -176,6 +186,31 @@ def replaceLineBreaks(progress: Progress, mdLines: list[str]) -> list[str]: return _lines +def correctTableSeparators(progress: Progress, mdLines: list[str]) -> list[str]: + """ Replace all table header separators with very simple ones. This is + needed because pandoc calculates the column width based on the separators. + And that is not always wanted + """ + _taskID = progress.add_task('[blue]Correcting table separators', total=0) + tableSeparatorRegex = re.compile('^(\s*)(\|[-\|]*\|)\s*$') + + _lines: list[str] = [] + for line in mdLines: + if checkInCodeBlock(line): + _lines.append(line) + continue + + match = re.match(tableSeparatorRegex, line) + if match: + # Replace the separators with a simple one + _lines.append(f'{match.group(1)}{re.sub("-+", "-", match.group(2))}\n') + else: + _lines.append(line) + + progress.stop_task(_taskID) + return _lines + + def process(document:str, outDirectory:str) -> None: with Progress(TextColumn('{task.description}'), TimeElapsedColumn()) as progress: mdLines = readMDFile(progress, document) @@ -184,8 +219,10 @@ def process(document:str, outDirectory:str) -> None: mdLines = replaceFigureCaptions(progress, mdLines) mdLines = replaceFiguresPathSvgToPng(progress, mdLines) mdLines = replaceLineBreaks(progress, mdLines) + mdLines = correctTableSeparators(progress, mdLines) writeMDFile(progress, mdLines, document, outDirectory) + def main(args=None): # Parse command line arguments parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) -- GitLab