From 9caf2dbda63e89197bbd7a5d569719cbe22552a8 Mon Sep 17 00:00:00 2001
From: ankraft <an.kraft@gmail.com>
Date: Thu, 18 Apr 2024 23:00:26 +0200
Subject: [PATCH] Removing html tags from clause titles and anchors

---
 toMkdocs/toMkdocs.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/toMkdocs/toMkdocs.py b/toMkdocs/toMkdocs.py
index 1ee9eeb..8ad53dc 100644
--- a/toMkdocs/toMkdocs.py
+++ b/toMkdocs/toMkdocs.py
@@ -98,6 +98,7 @@ _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
 _markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE)
 _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
 _htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE)
+_htmlTag = re.compile(r'<[^>]*>', re.IGNORECASE)
 _matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE)
 
 
@@ -172,6 +173,7 @@ def analyseMarkdown(filename:str) -> list[Clause]:
 		if (m := _matchHeader.match(line)):
 			# Add a new clause
 			clauseTitle = m.groups()[1].strip()
+			clauseTitle = re.sub(_htmlTag, '', clauseTitle)
 			headerNumber = _matchHeaderNumber.search(clauseTitle)
 			outClauses.append(Clause(len(m.groups()[0]), # level
 						  		   headerNumber.group() if headerNumber else shortHash(clauseTitle, 6),
@@ -278,12 +280,16 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]:
 		# Find all headers in the clause
 		for line in clause.lines:
 			if (m := _matchHeader.match(line.text)):
+				
 				# convert the header to anchor format and add it to the dictionary
 				# Remove special characters
 				# TODO move perhaps to an own function
 				anchor = m.groups()[1].strip().casefold().replace(' ', '-')
 				for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'):
 					anchor = anchor.replace(c, '')
+				# remove html tags from the anchor
+				anchor = re.sub(_htmlTag, '', anchor)
+
 				linkTargets[f'#{anchor}'] = clause
 				if veryVerbose:
 					print(f'[dim]Added Markdown anchor "{anchor}"')
-- 
GitLab