Detecting images and tables in the markdown parser

78b6c6f4 · Andreas Kraft · Miguel Angel Reina Ortega · f2a6c36b · 78b6c6f4
Commit 78b6c6f4 authored 1 year ago by Andreas Kraft Committed by Miguel Angel Reina Ortega 1 year ago
--- a/toMkdocs/toMkdocs.py
+++ b/toMkdocs/toMkdocs.py
@@ -24,6 +24,11 @@ class LineType(Enum):
 	CODEFENCEEND = auto()
 	LIST = auto()
 	NOTE = auto()
+	STANDALONEIMAGE = auto()
+	TABLEHEADER = auto()
+	TABLESEPARATOR = auto()
+	TABLEROW = auto()
+	TABLELASTROW = auto()
 @dataclass
@@ -136,6 +141,9 @@ _matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE)
 _matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE)
 _matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE)
 _matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE)
+_matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECASE)
+_matchTable = re.compile(r'^\s*\|.*\|\s$', re.IGNORECASE)
+_matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE)
 _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
 _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE)
 _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
@@ -186,6 +194,8 @@ def analyseMarkdown(filename:str) -> list[Clause]:
 	# Go through the lines and detect headers and codefences
 	inCodefence = False
+	inTable = False
+	tableHasSeparator = False
 	for line in inLines:
 		# Detect and handle codefences
@@ -205,12 +215,37 @@ def analyseMarkdown(filename:str) -> list[Clause]:
 			outClauses[-1].append(Line(line, LineType.CODE))
 			continue
+		# Detect and handle tables
+		if _matchTable.match(line) and not inTable:
+			inTable = True
+			outClauses[-1].append(Line(line, LineType.TABLEHEADER))
+			continue
+		if inTable:
+			if _matchTableSeparator.match(line) and not tableHasSeparator:
+				outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
+				tableHasSeparator = True
+				continue
+			elif _matchTable.match(line):
+				outClauses[-1].append(Line(line, LineType.TABLEROW))
+				continue
+			else:
+				inTable = False
+				tableHasSeparator = False
+				# Mark the previous line as the last row in the table
+				outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW
+				# continue with other matches
 		# Detect notes
  		# Notes are lines that start with a '>'.
 		if _matchNote.match(line):
 			outClauses[-1].append(Line(line, LineType.NOTE))
 			continue
+		# Detect images on a single line
+		if (m := _matchStandAloneImage.match(line)):
+			outClauses[-1].append(Line(line, LineType.STANDALONEIMAGE))
+			continue
 		# Detect headers
 		_lineType = LineType.TEXT
 		if (m := _matchHeader.match(line)):