From 9caf2dbda63e89197bbd7a5d569719cbe22552a8 Mon Sep 17 00:00:00 2001 From: ankraft <an.kraft@gmail.com> Date: Thu, 18 Apr 2024 23:00:26 +0200 Subject: [PATCH] Removing html tags from clause titles and anchors --- toMkdocs/toMkdocs.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/toMkdocs/toMkdocs.py b/toMkdocs/toMkdocs.py index 1ee9eeb..8ad53dc 100644 --- a/toMkdocs/toMkdocs.py +++ b/toMkdocs/toMkdocs.py @@ -98,6 +98,7 @@ _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE) _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE) _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) _htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE) +_htmlTag = re.compile(r'<[^>]*>', re.IGNORECASE) _matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE) @@ -172,6 +173,7 @@ def analyseMarkdown(filename:str) -> list[Clause]: if (m := _matchHeader.match(line)): # Add a new clause clauseTitle = m.groups()[1].strip() + clauseTitle = re.sub(_htmlTag, '', clauseTitle) headerNumber = _matchHeaderNumber.search(clauseTitle) outClauses.append(Clause(len(m.groups()[0]), # level headerNumber.group() if headerNumber else shortHash(clauseTitle, 6), @@ -278,12 +280,16 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]: # Find all headers in the clause for line in clause.lines: if (m := _matchHeader.match(line.text)): + # convert the header to anchor format and add it to the dictionary # Remove special characters # TODO move perhaps to an own function anchor = m.groups()[1].strip().casefold().replace(' ', '-') for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'): anchor = anchor.replace(c, '') + # remove html tags from the anchor + anchor = re.sub(_htmlTag, '', anchor) + linkTargets[f'#{anchor}'] = clause if veryVerbose: print(f'[dim]Added Markdown anchor "{anchor}"') -- GitLab