Removing html tags from clause titles and anchors

9caf2dbd · Andreas Kraft · 001d4d9e · 9caf2dbd
Commit 9caf2dbd authored 1 year ago by Andreas Kraft
--- a/toMkdocs/toMkdocs.py
+++ b/toMkdocs/toMkdocs.py
@@ -98,6 +98,7 @@ _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
 _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE)
 _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
 _htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE)
+_htmlTag = re.compile(r'<[^>]*>', re.IGNORECASE)
 _matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE)
@@ -172,6 +173,7 @@ def analyseMarkdown(filename:str) -> list[Clause]:
 		if (m := _matchHeader.match(line)):
 			# Add a new clause
 			clauseTitle = m.groups()[1].strip()
+			clauseTitle = re.sub(_htmlTag, '', clauseTitle)
 			headerNumber = _matchHeaderNumber.search(clauseTitle)
 			outClauses.append(Clause(len(m.groups()[0]), # level
 						  		   headerNumber.group() if headerNumber else shortHash(clauseTitle, 6),
@@ -278,12 +280,16 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]:
 		# Find all headers in the clause
 		for line in clause.lines:
 			if (m := _matchHeader.match(line.text)):
 				# convert the header to anchor format and add it to the dictionary
 				# Remove special characters
 				# TODO move perhaps to an own function
 				anchor = m.groups()[1].strip().casefold().replace(' ', '-')
 				for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'):
 					anchor = anchor.replace(c, '')
+				# remove html tags from the anchor
+				anchor = re.sub(_htmlTag, '', anchor)
 				linkTargets[f'#{anchor}'] = clause
 				if veryVerbose:
 					print(f'[dim]Added Markdown anchor "{anchor}"')