From 78b6c6f45904a50a3c475c376f8202c64f5c795d Mon Sep 17 00:00:00 2001
From: ankraft <an.kraft@gmail.com>
Date: Fri, 26 Apr 2024 12:44:03 +0200
Subject: [PATCH] Detecting images and tables in the markdown parser

---
 toMkdocs/toMkdocs.py | 39 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/toMkdocs/toMkdocs.py b/toMkdocs/toMkdocs.py
index aaace80..9277e2b 100644
--- a/toMkdocs/toMkdocs.py
+++ b/toMkdocs/toMkdocs.py
@@ -24,6 +24,11 @@ class LineType(Enum):
 	CODEFENCEEND = auto()
 	LIST = auto()
 	NOTE = auto()
+	STANDALONEIMAGE = auto()
+	TABLEHEADER = auto()
+	TABLESEPARATOR = auto()
+	TABLEROW = auto()
+	TABLELASTROW = auto()
 
 
 @dataclass
@@ -136,6 +141,9 @@ _matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE)
 _matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE)
 _matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE)
 _matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE)
+_matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECASE)
+_matchTable = re.compile(r'^\s*\|.*\|\s$', re.IGNORECASE)
+_matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE)
 _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
 _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE)
 _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
@@ -186,6 +194,8 @@ def analyseMarkdown(filename:str) -> list[Clause]:
 
 	# Go through the lines and detect headers and codefences
 	inCodefence = False
+	inTable = False
+	tableHasSeparator = False
 	for line in inLines:
 
 		# Detect and handle codefences
@@ -204,13 +214,38 @@ def analyseMarkdown(filename:str) -> list[Clause]:
 		if inCodefence:
 			outClauses[-1].append(Line(line, LineType.CODE))
 			continue
-	
+
+		# Detect and handle tables
+		if _matchTable.match(line) and not inTable:
+			inTable = True
+			outClauses[-1].append(Line(line, LineType.TABLEHEADER))
+			continue
+		if inTable:
+			if _matchTableSeparator.match(line) and not tableHasSeparator:
+				outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
+				tableHasSeparator = True
+				continue
+			elif _matchTable.match(line):
+				outClauses[-1].append(Line(line, LineType.TABLEROW))
+				continue
+			else:
+				inTable = False
+				tableHasSeparator = False
+				# Mark the previous line as the last row in the table
+				outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW
+				# continue with other matches
+
 		# Detect notes
   		# Notes are lines that start with a '>'.
 		if _matchNote.match(line):
 			outClauses[-1].append(Line(line, LineType.NOTE))
 			continue
-  
+
+		# Detect images on a single line
+		if (m := _matchStandAloneImage.match(line)):
+			outClauses[-1].append(Line(line, LineType.STANDALONEIMAGE))
+			continue
+
 		# Detect headers
 		_lineType = LineType.TEXT
 		if (m := _matchHeader.match(line)):
-- 
GitLab