Skip to content
Snippets Groups Projects
Commit 4299b861 authored by Andreas Kraft's avatar Andreas Kraft
Browse files

Added table separator correction.

the length of the separators is used in pandoc to calculate column widths. this is not what we need, and could be wrong in some cases.
parent a7e7b458
No related branches found
No related tags found
No related merge requests found
Pipeline #1361 passed
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
# #
import argparse, os, re, sys import argparse, os, re, sys
from rich import print from rich import print, inspect
from rich.progress import Progress, TextColumn, TimeElapsedColumn from rich.progress import Progress, TextColumn, TimeElapsedColumn
def readMDFile(progress:Progress, document:str) -> list[str]: def readMDFile(progress:Progress, document:str) -> list[str]:
...@@ -39,6 +39,16 @@ def writeMDFile(progress:Progress, mdLines:list[str], document:str, outDirectory ...@@ -39,6 +39,16 @@ def writeMDFile(progress:Progress, mdLines:list[str], document:str, outDirectory
progress.stop_task(_taskID) progress.stop_task(_taskID)
_inCodeBlock = False
def checkInCodeBlock(line:str) -> bool:
global _inCodeBlock
# Check if we are in a code block
if line.strip().startswith('```'):
_inCodeBlock = not _inCodeBlock
return _inCodeBlock
def correctTOC(progress:Progress, mdLines:list[str], tocSection:str = 'Contents') -> list[str]: def correctTOC(progress:Progress, mdLines:list[str], tocSection:str = 'Contents') -> list[str]:
""" Correct the TOC to be compatible with pandoc. """ Correct the TOC to be compatible with pandoc.
""" """
...@@ -176,6 +186,31 @@ def replaceLineBreaks(progress: Progress, mdLines: list[str]) -> list[str]: ...@@ -176,6 +186,31 @@ def replaceLineBreaks(progress: Progress, mdLines: list[str]) -> list[str]:
return _lines return _lines
def correctTableSeparators(progress: Progress, mdLines: list[str]) -> list[str]:
""" Replace all table header separators with very simple ones. This is
needed because pandoc calculates the column width based on the separators.
And that is not always wanted
"""
_taskID = progress.add_task('[blue]Correcting table separators', total=0)
tableSeparatorRegex = re.compile('^(\s*)(\|[-\|]*\|)\s*$')
_lines: list[str] = []
for line in mdLines:
if checkInCodeBlock(line):
_lines.append(line)
continue
match = re.match(tableSeparatorRegex, line)
if match:
# Replace the separators with a simple one
_lines.append(f'{match.group(1)}{re.sub("-+", "-", match.group(2))}\n')
else:
_lines.append(line)
progress.stop_task(_taskID)
return _lines
def process(document:str, outDirectory:str) -> None: def process(document:str, outDirectory:str) -> None:
with Progress(TextColumn('{task.description}'), TimeElapsedColumn()) as progress: with Progress(TextColumn('{task.description}'), TimeElapsedColumn()) as progress:
mdLines = readMDFile(progress, document) mdLines = readMDFile(progress, document)
...@@ -184,8 +219,10 @@ def process(document:str, outDirectory:str) -> None: ...@@ -184,8 +219,10 @@ def process(document:str, outDirectory:str) -> None:
mdLines = replaceFigureCaptions(progress, mdLines) mdLines = replaceFigureCaptions(progress, mdLines)
mdLines = replaceFiguresPathSvgToPng(progress, mdLines) mdLines = replaceFiguresPathSvgToPng(progress, mdLines)
mdLines = replaceLineBreaks(progress, mdLines) mdLines = replaceLineBreaks(progress, mdLines)
mdLines = correctTableSeparators(progress, mdLines)
writeMDFile(progress, mdLines, document, outDirectory) writeMDFile(progress, mdLines, document, outDirectory)
def main(args=None): def main(args=None):
# Parse command line arguments # Parse command line arguments
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment