From 4299b86171693b1493f3f6fd3e006b7db731a1e1 Mon Sep 17 00:00:00 2001
From: ankraft <an.kraft@gmail.com>
Date: Wed, 23 Oct 2024 22:53:21 +0200
Subject: [PATCH] Added table separator correction.

the length of the separators is used in pandoc to calculate column widths. this is not what we need, and could be wrong in some cases.
---
 generateChangemarks/pandocFilter.py | 39 ++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/generateChangemarks/pandocFilter.py b/generateChangemarks/pandocFilter.py
index 23e6a25..2550dc3 100644
--- a/generateChangemarks/pandocFilter.py
+++ b/generateChangemarks/pandocFilter.py
@@ -8,7 +8,7 @@
 #
 
 import argparse, os, re, sys
-from rich import print
+from rich import print, inspect
 from rich.progress import Progress, TextColumn, TimeElapsedColumn
 
 def readMDFile(progress:Progress, document:str) -> list[str]:
@@ -39,6 +39,16 @@ def writeMDFile(progress:Progress, mdLines:list[str], document:str, outDirectory
 	progress.stop_task(_taskID)
 
 
+_inCodeBlock = False
+def checkInCodeBlock(line:str) -> bool:
+	global _inCodeBlock
+
+	# Check if we are in a code block
+	if line.strip().startswith('```'):
+		_inCodeBlock = not _inCodeBlock
+	return _inCodeBlock
+
+
 def correctTOC(progress:Progress, mdLines:list[str], tocSection:str = 'Contents') -> list[str]:
 	"""	Correct the TOC to be compatible with pandoc.
 	"""
@@ -176,6 +186,31 @@ def replaceLineBreaks(progress: Progress, mdLines: list[str]) -> list[str]:
 	return _lines
 
 
+def correctTableSeparators(progress: Progress, mdLines: list[str]) -> list[str]:
+	"""	Replace all table header separators with very simple ones. This is
+		needed because pandoc calculates the column width based on the separators.
+		And that is not always wanted
+	"""
+	_taskID = progress.add_task('[blue]Correcting table separators', total=0)
+	tableSeparatorRegex = re.compile('^(\s*)(\|[-\|]*\|)\s*$')
+
+	_lines: list[str] = []
+	for line in mdLines:
+		if checkInCodeBlock(line):
+			_lines.append(line)
+			continue
+
+		match = re.match(tableSeparatorRegex, line)
+		if match:
+			# Replace the separators with a simple one
+			_lines.append(f'{match.group(1)}{re.sub("-+", "-", match.group(2))}\n')
+		else:
+			_lines.append(line)
+
+	progress.stop_task(_taskID)
+	return _lines
+
+
 def process(document:str, outDirectory:str) -> None:
 	with Progress(TextColumn('{task.description}'),  TimeElapsedColumn()) as progress:
 		mdLines = readMDFile(progress, document)
@@ -184,8 +219,10 @@ def process(document:str, outDirectory:str) -> None:
 		mdLines = replaceFigureCaptions(progress, mdLines)
 		mdLines = replaceFiguresPathSvgToPng(progress, mdLines)
 		mdLines = replaceLineBreaks(progress, mdLines)
+		mdLines = correctTableSeparators(progress, mdLines)
 		writeMDFile(progress, mdLines, document, outDirectory)
 
+
 def main(args=None):
 	# Parse command line arguments
 	parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-- 
GitLab