From b65475dff2d842c40e57dc6d6e68bd088b863a2d Mon Sep 17 00:00:00 2001
From: ankraft <an.kraft@gmail.com>
Date: Tue, 1 Oct 2024 11:48:46 +0200
Subject: [PATCH] Added support for footnotes (move them to the correct clauses
 if detected somewhere in the document). Also refactored functions into a new
 Document class

---
 toMkdocs/toMkdocs.py | 499 +++++++++++++++++++++++--------------------
 1 file changed, 268 insertions(+), 231 deletions(-)

diff --git a/toMkdocs/toMkdocs.py b/toMkdocs/toMkdocs.py
index 54a79bd..1db1167 100644
--- a/toMkdocs/toMkdocs.py
+++ b/toMkdocs/toMkdocs.py
@@ -91,6 +91,253 @@ class Clause:
 		return sum([ len(l.text) for l in self.lines ])
 
 
+class Footnote:
+	"""	Represents a footnote in the markdown file. """
+	def __init__(self, id:str, line:Line) -> None:
+		self.id = id
+		self.line = line
+
+
+class Document:
+	"""	Represents the document object. """	
+	clauses:list[Clause] = []
+	footnotes:list[Footnote] = []
+
+	def __init__(self, clauses:list[Clause], footnotes:list[Footnote]) -> None:
+		self.clauses = clauses
+		self.footnotes = footnotes
+
+
+	def splitMarkdownDocument(self, 
+							  ignoreTitles:list[str] = [], 
+							  splitLevel:int = 1,
+							  ignoreUntilFirstHeading:bool = True) -> None:
+		"""	Split the clauses at a certain level. This is used to create the separate
+			markdown files for MkDocs.
+
+			After the split, the clauses are stored in the document object.
+
+			Args:
+				ignoreTitles: A list of titles that should be ignored. They are not included in the output.
+				splitLevel: The level at which the clauses should be split.
+				ignoreUntilFirstHeader: Ignore all clauses until the first heading.
+			
+		"""
+		result:list[Clause] = [Clause(0, '', '', [])]
+
+		ignoreTitles = [ t.casefold() for t in ignoreTitles ]	# convert to lower case
+
+		for clause in self.clauses:
+			level = clause.level
+
+			# Check if the current clause should be ignored
+			if clause.title.casefold() in ignoreTitles:
+				continue
+
+			# Add a new output clause if the current clause's level is 
+			# equal or less than the split level
+			if clause.level <= splitLevel:
+				result.append(Clause(level, clause.clauseNumber, clause.title, []))
+			
+			# Add the lines to the output clause
+			result[-1].extend(clause)
+		
+		# Remove the first clause if it has no title
+		if ignoreUntilFirstHeading:
+			while len(result[0].title) == 0:
+				result.pop(0)
+		
+		self.clauses = result
+
+
+	def insertFootnotes(self) -> None:
+		"""	Insert footnotes into the clauses.
+
+			After the insertion, the clauses are stored in the document object.
+			
+		"""
+		print(f'[green]Adding footnotes to clauses')
+
+		for clause in self.clauses:
+			foundFootnotes:list[Footnote] = []
+			for line in clause.lines:
+				# ATTN: Only footnotes in normal text lines are checked
+				
+				if line.lineType == LineType.TEXT and (fn := _inlineFootnote.search(line.text)):
+					# Find the footnote in the list of footnotes
+					for f in self.footnotes:
+						if f.id == fn.groups()[0]:
+							foundFootnotes.append(f)
+
+			# Insert the footnotes at the end of the clause
+			if len(foundFootnotes) > 0:
+				clause.append(Line('\n', LineType.TEXT))
+				for f in foundFootnotes:
+					clause.append(f.line)
+
+
+	def updateLinks(self) -> None:
+		"""	Update the links in the clauses to the new structure. This is done by
+			creating a dictionary of all links and their targets and then replacing
+			the links in the clauses.
+
+			After the update, the clauses are stored in the document object.
+		"""
+		print(f'[green]Updating links in clauses')
+
+		# Build the link target dictionary. Mapping anchor -> clause
+		linkTargets:dict[str, Clause] = {}
+
+		# Find all Markdown headers in the clauses and convert them to anchor format
+		for i, clause in enumerate(self.clauses):
+			# Find all headers in the clause
+			for line in clause.lines:
+				if (m := _matchHeader.match(line.text)):
+					
+					# convert the header to anchor format and add it to the dictionary
+					# Remove special characters
+					# TODO move perhaps to an own function
+					anchor = m.groups()[1].strip().casefold().replace(' ', '-')
+					for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'):
+						anchor = anchor.replace(c, '')
+					# remove html tags from the anchor
+					anchor = re.sub(_htmlTag, '', anchor)
+
+					linkTargets[f'#{anchor}'] = clause
+					if veryVerbose:
+						print(f'[dim]Added Markdown anchor "{anchor}"')
+
+		# Find all HTML anchors in the clauses and add them to the dictionary
+		for i, clause in enumerate(self.clauses):
+			for line in clause.lines:
+				if (anchors := _htmlAnchorLink.findall(line.text)):
+					for a in anchors:
+						linkTargets[f'#{a}'] = clause
+						if veryVerbose:
+							print(f'[dim]Found HTML anchor "{a}" in clause "{clause.title}"')
+
+		# Replace the html links
+		for clause in self.clauses:
+			for i, line in enumerate(clause.lines):
+				if (links := _htmlLink.findall(line.text)):
+					for lnk in links:
+						if lnk in linkTargets:
+							line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[lnk].clauseNumber}/#{lnk[1:]}')	# Update the current line as well
+					if veryVerbose:
+						print(f'[dim]Updated HTML link "{lnk}" in clause "{clause.title}"')
+
+		# Replace the markdown links
+		for clause in self.clauses:
+			for i, line in enumerate(clause.lines):
+				if (links := _markdownLink.findall(line.text)):
+					# Replace the old link targets with converted 
+					# (lower case) versions that point to the output files
+					for lnk in links:
+						_lnk =lnk.casefold()
+						if _lnk in linkTargets:
+							line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[_lnk].clauseNumber}/#{lnk[1:]}')	# Update the current line as well
+					if veryVerbose:
+						print(f'[dim]Updated Markdown link "{lnk}" in clause "{clause.title}"')
+
+
+	def updateNotes(self) -> None:
+		"""	Update the notes in the clauses to the mkDocs notes version.
+
+			After the update, the clauses are stored in the document object.
+		"""
+		print(f'[green]Updating notes in clauses')
+
+		for clause in self.clauses:
+			lines:list[Line] = []
+			inNote = False
+			for line in clause.lines:
+				if line.lineType == LineType.NOTE:
+					if not inNote:
+						lines.append(Line('\n', LineType.TEXT))
+						lines.append(Line('!!! note\n', LineType.NOTE))
+						inNote = True
+					lines.append(Line(f"\t{re.sub(_matchNoteStart, '', line.text)}", LineType.NOTE))
+					if verbose:
+						print(f'[dim]Converted note in clause "{clause.title}"')
+				else:
+					if inNote:
+						lines.append(Line('\n', LineType.TEXT))
+					inNote = False
+					lines.append(line)
+			clause.lines = lines
+
+
+	def prepareForMkdocs(self) -> None:
+		"""	Prepare the clauses for MkDocs. This includes removing the heading
+			from the clauses and marking the clauses that are only for navigation.
+
+			After the preparation, the clauses are stored in the document object.
+		"""
+
+		# Remove the heading from the lines. The heading is the first line
+		# in the clause. This is done because MkDocs repeats the heading when
+		# displaying the page.
+		for clause in self.clauses:
+			if clause.linesCount > 0:
+				clause.lines.pop(0)
+				# Also, remove the first empty lines if they exist
+				while clause.linesCount > 0 and clause.lines[0].text.strip() == '':
+					clause.lines.pop(0)
+		
+		# Repair wrong markdown for indented lines.
+		# Add 2 spaces to existing 2-space indentions
+		for clause in self.clauses:
+			for i, line in enumerate(clause.lines):
+				if _match2spaceListIndention.match(line.text):
+					clause.lines[i].text = '  ' + line.text
+
+
+	def writeClausesMkDocs(self, filename:str, navTitle:str) -> None:
+		"""	Write the clauses to separate files and create a navigation file.
+
+			Args:
+				filename: The name of the original markdown file.
+				navTitle: The title of the navigation entry. This is used to determine the directories.
+		"""
+
+		print(f'[green]Writing clauses to files')
+		# create directory first
+		os.makedirs(f'{os.path.dirname(filename)}/{navTitle}', exist_ok = True)
+
+		# Write the files
+		for i, f in enumerate(self.clauses):
+			# write to single files, even empty ones
+			if verbose:
+				print(f'[dim]Writing "{f.clauseNumber}.md" - "{f.title}"')
+			with open(f'{os.path.dirname(filename)}/{navTitle}/{f.clauseNumber}.md', 'w') as file:
+				# Add one empty line before the clause. This is done to avoid
+				# a bug in MkDocs that does not display the first line of a clause
+				# if it contains a colon. It does not matter otherwise if the line
+				# is empty or not.
+				file.writelines(f.asStringList(1))	
+
+		
+		# write nav.yml file
+		print(f'[green]Writing "_nav.yml"')
+		with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file:
+			if veryVerbose:
+				print(f'[dim]Writing navigation file')
+			file.write(f'  - {navTitle}:\n')
+			for i, f in enumerate(self.clauses):
+
+				# TODO handle if the next clause is more than one level deeper
+	
+				_title = f.title.replace("'", '"')
+				nextClause = self.clauses[i+1] if i+1 < len(self.clauses) else None
+				if nextClause is None or nextClause.level <= f.level:
+					file.write(f"  {'  '*f.level}- '{_title}': '{navTitle}/{f.clauseNumber}.md'\n")
+				else:
+					file.write(f"  {'  '*f.level}- '{_title}':\n")
+					if len(f) > 0:
+						file.write(f"  {'  '*nextClause.level}- 'Introduction': '{navTitle}/{f.clauseNumber}.md'\n")
+
+
+
 _matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE)
 _matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE)
 _matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE)
@@ -102,6 +349,8 @@ _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
 _htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE)
 _htmlTag = re.compile(r'<[^>]*>', re.IGNORECASE)
 _matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE)
+_footnote = re.compile(r'\[\^([^\]]*)\]:', re.IGNORECASE)
+_inlineFootnote = re.compile(r'\[\^([^\]]*)\]', re.IGNORECASE)
 
 
 # TODO handle multiple nav levels (left bar) better (make conifgurable)
@@ -124,14 +373,14 @@ def shortHash(value:str, length:int) -> str:
 			 ).decode()[:length]
 
 
-def analyseMarkdown(filename:str) -> list[Clause]:
+def analyseMarkdown(filename:str) -> Document:
 	"""	Analyse the markdown file and split it into clauses.
 
 		Args:
 			filename: The name of the markdown file.
 
 		Returns:
-			The list of clauses.
+			The document object.
 	"""
 
 	print(f'[green]Analyzing "{filename}"')
@@ -142,6 +391,7 @@ def analyseMarkdown(filename:str) -> list[Clause]:
 		inLines = file.readlines()
 	
 	outClauses:list[Clause] = [Clause(0, '', '', [])]
+	footnotes:list[Footnote] = []
 
 	# Go through the lines and detect headers and codefences
 	inCodefence = False
@@ -169,6 +419,12 @@ def analyseMarkdown(filename:str) -> list[Clause]:
 		if _matchNote.match(line):
 			outClauses[-1].append(Line(line, LineType.NOTE))
 			continue
+
+		# Detect footnotes
+		# Footnotes are lines that start with a '^'
+		if (_fn := _footnote.match(line)):
+			footnotes.append(Footnote(_fn.groups()[0], Line(line, LineType.TEXT)))
+			continue
   
 		# Detect headers
 		_lineType = LineType.TEXT
@@ -186,227 +442,7 @@ def analyseMarkdown(filename:str) -> list[Clause]:
 		# Just add the line to the current clause as text
 		outClauses[-1].append(Line(line, _lineType))
 
-	return outClauses
-
-
-def splitMarkdownDocument(clauses:list[Clause], 
-						  ignoreTitles:list[str] = [], 
-						  splitLevel:int = 1,
-						  ignoreUntilFirstHeading:bool = True) -> list[Clause]:
-	"""	Split the clauses at a certain level. This is used to create the separate
-		markdown files for MkDocs.
-
-		Args:
-			clauses: The list of clauses.
-			ignoreTitles: A list of titles that should be ignored. They are not included in the output.
-			splitLevel: The level at which the clauses should be split.
-			ignoreUntilFirstHeader: Ignore all clauses until the first heading.
-		
-		Returns:
-			The list of clauses.
-	"""
-	outClauses:list[Clause] = [Clause(0, '', '', [])]
-
-	for clause in clauses:
-		level = clause.level
-
-		# Check if the current clause should be ignored
-		if clause.title.casefold() in ignoreTitles:
-			continue
-
-		# Add a new output clause if the current clause's level is 
-  		# equal or less than the split level
-		if clause.level <= splitLevel:
-			outClauses.append(Clause(level, clause.clauseNumber, clause.title, []))
-		
-		# Add the lines to the output clause
-		outClauses[-1].extend(clause)
-	
-	# Remove the first clause if it has no title
-	if ignoreUntilFirstHeading:
-		while len(outClauses[0].title) == 0:
-			outClauses.pop(0)
-
-	return outClauses
-
-
-def prepareForMkdocs(clauses:list[Clause]) -> list[Clause]:
-	"""	Prepare the clauses for MkDocs. This includes removing the heading
-		from the clauses and marking the clauses that are only for navigation.
-
-		Args:
-			clauses: The list of clauses.
-		
-		Returns:
-			The list of clauses.
-	"""
-
-	# Remove the heading from the lines. The heading is the first line
-	# in the clause. This is done because MkDocs repeats the heading when
-	# displaying the page.
-	for clause in clauses:
-		if clause.linesCount > 0:
-			clause.lines.pop(0)
-			# Also, remove the first empty lines if they exist
-			while clause.linesCount > 0 and clause.lines[0].text.strip() == '':
-				clause.lines.pop(0)
-	
-	# Repair wrong markdown for indented lines.
-	# Add 2 spaces to existing 2-space indentions
-	for clause in clauses:
-		for i, line in enumerate(clause.lines):
-			if _match2spaceListIndention.match(line.text):
-				clause.lines[i].text = '  ' + line.text
-	
-	return clauses
-
-
-def updateLinks(clauses:list[Clause]) -> list[Clause]:
-	"""	Update the links in the clauses to the new structure. This is done by
-		creating a dictionary of all links and their targets and then replacing
-		the links in the clauses.
-
-		Args:
-			clauses: The list of clauses.
-		
-		Returns:
-			The list of clauses.
-	"""
-	print(f'[green]Updating links in clauses')
-
-	# Build the link target dictionary. Mapping anchor -> clause
-	linkTargets:dict[str, Clause] = {}
-
-	# Find all Markdown headers in the clauses and convert them to anchor format
-	for i, clause in enumerate(clauses):
-		# Find all headers in the clause
-		for line in clause.lines:
-			if (m := _matchHeader.match(line.text)):
-				
-				# convert the header to anchor format and add it to the dictionary
-				# Remove special characters
-				# TODO move perhaps to an own function
-				anchor = m.groups()[1].strip().casefold().replace(' ', '-')
-				for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'):
-					anchor = anchor.replace(c, '')
-				# remove html tags from the anchor
-				anchor = re.sub(_htmlTag, '', anchor)
-
-				linkTargets[f'#{anchor}'] = clause
-				if veryVerbose:
-					print(f'[dim]Added Markdown anchor "{anchor}"')
-
-	# Find all HTML anchors in the clauses and add them to the dictionary
-	for i, clause in enumerate(clauses):
-		for line in clause.lines:
-			if (anchors := _htmlAnchorLink.findall(line.text)):
-				for a in anchors:
-					linkTargets[f'#{a}'] = clause
-					if veryVerbose:
-						print(f'[dim]Found HTML anchor "{a}" in clause "{clause.title}"')
-
-	# Replace the html links
-	for clause in clauses:
-		for i, line in enumerate(clause.lines):
-			if (links := _htmlLink.findall(line.text)):
-				for lnk in links:
-					if lnk in linkTargets:
-						line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[lnk].clauseNumber}/#{lnk[1:]}')	# Update the current line as well
-				if veryVerbose:
-					print(f'[dim]Updated HTML link "{lnk}" in clause "{clause.title}"')
-
-	# Replace the markdown links
-	for clause in clauses:
-		for i, line in enumerate(clause.lines):
-			if (links := _markdownLink.findall(line.text)):
-				# Replace the old link targets with converted 
-				# (lower case) versions that point to the output files
-				for lnk in links:
-					_lnk =lnk.casefold()
-					if _lnk in linkTargets:
-						line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[_lnk].clauseNumber}/#{lnk[1:]}')	# Update the current line as well
-				if veryVerbose:
-					print(f'[dim]Updated Markdown link "{lnk}" in clause "{clause.title}"')
-
-	return clauses
-
-
-def updateNotes(clauses:list[Clause]) -> list[Clause]:
-	"""	Update the notes in the clauses to the mkDocs notes version.
-
-		Args:
-			clauses: The list of clauses.
-		
-		Returns:
-			The list of clauses.
-	"""
-	print(f'[green]Updating notes in clauses')
-
-	for clause in clauses:
-		lines:list[Line] = []
-		inNote = False
-		for line in clause.lines:
-			if line.lineType == LineType.NOTE:
-				if not inNote:
-					lines.append(Line('\n', LineType.TEXT))
-					lines.append(Line('!!! note\n', LineType.NOTE))
-					inNote = True
-				lines.append(Line(f"\t{re.sub(_matchNoteStart, '', line.text)}", LineType.NOTE))
-				if verbose:
-					print(f'[dim]Converted note in clause "{clause.title}"')
-			else:
-				if inNote:
-					lines.append(Line('\n', LineType.TEXT))
-				inNote = False
-				lines.append(line)
-		clause.lines = lines
-	return clauses
-
-
-def writeClauses(outClauses:list[Clause], filename:str, navTitle:str) -> None:
-	"""	Write the clauses to separate files and create a navigation file.
-
-		Args:
-			outClauses: The list of clauses.
-			filename: The name of the original markdown file.
-			navTitle: The title of the navigation entry. This is used to determine the directories.
-	"""
-
-	print(f'[green]Writing clauses to files')
-	# create directory first
-	os.makedirs(f'{os.path.dirname(filename)}/{navTitle}', exist_ok = True)
-
-	# Write the files
-	for i, f in enumerate(outClauses):
-		# write to single files, even empty ones
-		if verbose:
-			print(f'[dim]Writing "{f.clauseNumber}.md" - "{f.title}"')
-		with open(f'{os.path.dirname(filename)}/{navTitle}/{f.clauseNumber}.md', 'w') as file:
-			# Add one empty line before the clause. This is done to avoid
-			# a bug in MkDocs that does not display the first line of a clause
-			# if it contains a colon. It does not matter otherwise if the line
-			# is empty or not.
-			file.writelines(f.asStringList(1))	
-
-	
-	# write nav.yml file
-	print(f'[green]Writing "_nav.yml"')
-	with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file:
-		if veryVerbose:
-			print(f'[dim]Writing navigation file')
-		file.write(f'  - {navTitle}:\n')
-		for i, f in enumerate(outClauses):
-
-			# TODO handle if the next clause is more than one level deeper
-   
-			_title = f.title.replace("'", '"')
-			nextClause = outClauses[i+1] if i+1 < len(outClauses) else None
-			if nextClause is None or nextClause.level <= f.level:
-				file.write(f"  {'  '*f.level}- '{_title}': '{navTitle}/{f.clauseNumber}.md'\n")
-			else:
-				file.write(f"  {'  '*f.level}- '{_title}':\n")
-				if len(f) > 0:
-					file.write(f"  {'  '*nextClause.level}- 'Introduction': '{navTitle}/{f.clauseNumber}.md'\n")
+	return Document(outClauses, footnotes)
 
 
 def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None:
@@ -429,24 +465,25 @@ def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') ->
 	
 def processDocument(args:argparse.Namespace) -> None:
 	global verbose, veryVerbose
-	document = os.path.abspath(args.document)
+	inDocumentFilename = os.path.abspath(args.document)
 	veryVerbose = args.very_verbose
 	verbose = args.verbose
 	if veryVerbose:
 		verbose = True
 
 	# Analyse the markdown file
-	clauses = analyseMarkdown(document)
-	clauses = splitMarkdownDocument(clauses, [ t.casefold() for t in args.ignore_clause ], args.split_level)
-	clauses = updateLinks(clauses)
-	clauses = updateNotes(clauses)
-	clauses = prepareForMkdocs(clauses)
+	document = analyseMarkdown(inDocumentFilename)
+	document.splitMarkdownDocument(args.ignore_clause, args.split_level)
+	document.insertFootnotes()
+	document.updateLinks()
+	document.updateNotes()
+	document.prepareForMkdocs()
 
 	# Write the clauses to files
-	writeClauses(clauses, document, args.title)
+	document.writeClausesMkDocs(inDocumentFilename, args.title)
 
 	# Copy the media files
-	copyMediaFiles(document, args.title, args.media_directory)
+	copyMediaFiles(inDocumentFilename, args.title, args.media_directory)
 
 
 if __name__ == '__main__':
-- 
GitLab