Skip to content
Snippets Groups Projects
Commit 78b6c6f4 authored by Andreas Kraft's avatar Andreas Kraft Committed by Miguel Angel Reina Ortega
Browse files

Detecting images and tables in the markdown parser

parent f2a6c36b
No related branches found
No related tags found
No related merge requests found
...@@ -24,6 +24,11 @@ class LineType(Enum): ...@@ -24,6 +24,11 @@ class LineType(Enum):
CODEFENCEEND = auto() CODEFENCEEND = auto()
LIST = auto() LIST = auto()
NOTE = auto() NOTE = auto()
STANDALONEIMAGE = auto()
TABLEHEADER = auto()
TABLESEPARATOR = auto()
TABLEROW = auto()
TABLELASTROW = auto()
@dataclass @dataclass
...@@ -136,6 +141,9 @@ _matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE) ...@@ -136,6 +141,9 @@ _matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE)
_matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE) _matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE)
_matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE) _matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE)
_matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE) _matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE)
_matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECASE)
_matchTable = re.compile(r'^\s*\|.*\|\s$', re.IGNORECASE)
_matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE)
_match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE) _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
_markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE) _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE)
_htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
...@@ -186,6 +194,8 @@ def analyseMarkdown(filename:str) -> list[Clause]: ...@@ -186,6 +194,8 @@ def analyseMarkdown(filename:str) -> list[Clause]:
# Go through the lines and detect headers and codefences # Go through the lines and detect headers and codefences
inCodefence = False inCodefence = False
inTable = False
tableHasSeparator = False
for line in inLines: for line in inLines:
# Detect and handle codefences # Detect and handle codefences
...@@ -205,12 +215,37 @@ def analyseMarkdown(filename:str) -> list[Clause]: ...@@ -205,12 +215,37 @@ def analyseMarkdown(filename:str) -> list[Clause]:
outClauses[-1].append(Line(line, LineType.CODE)) outClauses[-1].append(Line(line, LineType.CODE))
continue continue
# Detect and handle tables
if _matchTable.match(line) and not inTable:
inTable = True
outClauses[-1].append(Line(line, LineType.TABLEHEADER))
continue
if inTable:
if _matchTableSeparator.match(line) and not tableHasSeparator:
outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
tableHasSeparator = True
continue
elif _matchTable.match(line):
outClauses[-1].append(Line(line, LineType.TABLEROW))
continue
else:
inTable = False
tableHasSeparator = False
# Mark the previous line as the last row in the table
outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW
# continue with other matches
# Detect notes # Detect notes
# Notes are lines that start with a '>'. # Notes are lines that start with a '>'.
if _matchNote.match(line): if _matchNote.match(line):
outClauses[-1].append(Line(line, LineType.NOTE)) outClauses[-1].append(Line(line, LineType.NOTE))
continue continue
# Detect images on a single line
if (m := _matchStandAloneImage.match(line)):
outClauses[-1].append(Line(line, LineType.STANDALONEIMAGE))
continue
# Detect headers # Detect headers
_lineType = LineType.TEXT _lineType = LineType.TEXT
if (m := _matchHeader.match(line)): if (m := _matchHeader.match(line)):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment