Skip to content
Snippets Groups Projects
Commit 9caf2dbd authored by Andreas Kraft's avatar Andreas Kraft
Browse files

Removing html tags from clause titles and anchors

parent 001d4d9e
No related branches found
No related tags found
No related merge requests found
...@@ -98,6 +98,7 @@ _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE) ...@@ -98,6 +98,7 @@ _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
_markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE) _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE)
_htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
_htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE) _htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE)
_htmlTag = re.compile(r'<[^>]*>', re.IGNORECASE)
_matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE) _matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE)
...@@ -172,6 +173,7 @@ def analyseMarkdown(filename:str) -> list[Clause]: ...@@ -172,6 +173,7 @@ def analyseMarkdown(filename:str) -> list[Clause]:
if (m := _matchHeader.match(line)): if (m := _matchHeader.match(line)):
# Add a new clause # Add a new clause
clauseTitle = m.groups()[1].strip() clauseTitle = m.groups()[1].strip()
clauseTitle = re.sub(_htmlTag, '', clauseTitle)
headerNumber = _matchHeaderNumber.search(clauseTitle) headerNumber = _matchHeaderNumber.search(clauseTitle)
outClauses.append(Clause(len(m.groups()[0]), # level outClauses.append(Clause(len(m.groups()[0]), # level
headerNumber.group() if headerNumber else shortHash(clauseTitle, 6), headerNumber.group() if headerNumber else shortHash(clauseTitle, 6),
...@@ -278,12 +280,16 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]: ...@@ -278,12 +280,16 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]:
# Find all headers in the clause # Find all headers in the clause
for line in clause.lines: for line in clause.lines:
if (m := _matchHeader.match(line.text)): if (m := _matchHeader.match(line.text)):
# convert the header to anchor format and add it to the dictionary # convert the header to anchor format and add it to the dictionary
# Remove special characters # Remove special characters
# TODO move perhaps to an own function # TODO move perhaps to an own function
anchor = m.groups()[1].strip().casefold().replace(' ', '-') anchor = m.groups()[1].strip().casefold().replace(' ', '-')
for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'): for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'):
anchor = anchor.replace(c, '') anchor = anchor.replace(c, '')
# remove html tags from the anchor
anchor = re.sub(_htmlTag, '', anchor)
linkTargets[f'#{anchor}'] = clause linkTargets[f'#{anchor}'] = clause
if veryVerbose: if veryVerbose:
print(f'[dim]Added Markdown anchor "{anchor}"') print(f'[dim]Added Markdown anchor "{anchor}"')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment