Added html and markdown internal links and anchors. Added configurable...

Added html and markdown internal links and anchors. Added configurable filename length. Renamed -i to -ic to avoid confusion

Added html and markdown internal links and anchors. Added configurable...
ddee5f58 · Andreas Kraft · 0ab633db · ddee5f58
Commit ddee5f58 authored 1 year ago by Andreas Kraft
--- a/toMkdocs/toMkdocs.py
+++ b/toMkdocs/toMkdocs.py
@@ -7,6 +7,7 @@
 #	directory structure.
 #

+from typing import Tuple
 import argparse, re, os, shutil
 from dataclasses import dataclass
 from rich import print
@@ -19,10 +20,15 @@ class Clause:
 	lines:list[str]
 	onlyNav:bool = False

+fnLength = 4

 _matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE)
 _matchCodefence = re.compile(r'\s*```\s?.*', re.IGNORECASE)
 _match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
+_markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#.*)\)', re.IGNORECASE)
+_htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
+_htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE)
+

 # TODO handle multiple nav levels (left bar) better (make conifgurable)
 # TODO Update links in the markdown files to the new structure
@@ -37,7 +43,7 @@ def analyseMarkdown(filename:str) -> list[Clause]:
 			The list of clauses.
 	"""

-	print(f'[gray]Analyzing file "{filename}"')
+	print(f'[green]Analyzing "{filename}"')

 	with open(filename, 'r') as file:
 		inLines = file.readlines()
@@ -165,12 +171,13 @@ def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None:
 			continue
 	
 		# write to single files
-		with open(f'{os.path.dirname(filename)}/{navTitle}/{i}.md', 'w') as file:
+		print(f'[green]Writing "{i:0{fnLength}}.md" - "{f.title}"')
+		with open(f'{os.path.dirname(filename)}/{navTitle}/{i:0{fnLength}}.md', 'w') as file:
 			file.writelines(f.lines)
-			print(f'[green]File "{i}.md" written - "{f.title}"')

 	
 	# write nav.yml file
+	print(f'[green]Writing "_nav.yml"')
 	with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file:
 		file.write(f'  - {navTitle}:\n')
 		for i, f in enumerate(outLines):
@@ -179,16 +186,13 @@ def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None:
 			else:
 				if len(f.lines) == 0:
 					continue
-				file.write(f"  {'  '*f.level}- '{f.title}': '{navTitle}/{i}.md'\n")
-	print(f'[green]File "_nav.yml" written')
-
+				file.write(f"  {'  '*f.level}- '{f.title}': '{navTitle}/{i:0{fnLength}}.md'\n")

-_markdownLink = re.compile(r'\[.*\]\((.*)\)', re.IGNORECASE)
-_htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
-_anchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE)

 def updateLinks(clauses:list[Clause]) -> list[Clause]:
-	"""	Update the links in the clauses to the new structure.
+	"""	Update the links in the clauses to the new structure. This is done by
+		creating a dictionary of all links and their targets and then replacing
+		the links in the clauses.

 		Args:
 			clauses: The list of clauses.
@@ -196,45 +200,48 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]:
 		Returns:
 			The list of clauses.
 	"""
+	print(f'[green]Updating links in clauses')

-	# Build the link target dictionary
-	linkTargets = {}
-
+	# Build the link target dictionary. Mapping anchor -> (clause index, clause)
+	linkTargets:dict[str, Tuple[int, str]] = {}

+	# Find all Markdown headers in the clauses and convert them to anchor format
+	for i, clause in enumerate(clauses):
+		# Find all headers in the clause
+		for line in clause.lines:
+			if (m := _matchHeader.match(line)):
+				# convert the header to anchor format and add it to the dictionary
+				# TODO move perhaps to an own function
+				anchor = m.groups()[1].strip().casefold().replace(' ', '-').replace('.', '')
+				linkTargets[f'#{anchor}'] = (i, clause)
+
+	# Find all HTML anchors in the clauses and add them to the dictionary
+	for i, clause in enumerate(clauses):
+		for line in clause.lines:
+			if (anchors := _htmlAnchorLink.findall(line)):
+				for a in anchors:
+					linkTargets[f'#{a}'] = (i, clause)
+
+	# Replace the html links
 	for clause in clauses:
 		for i, line in enumerate(clause.lines):
-			if (lnk := _anchorLink.findall(line)):
-				linkTargets[lnk[0]] = clause
-
-			# # Check if the line contains a link
-			# if not (lnk := _markdownLink.search(line)) and not (lnk := _htmlLink.search(line)) and not (lnk := _anchorLink.search(line)):
-			# 	continue
-			# print(lnk)
-			# print(lnk.groups()[0])
-			
-
-
-
-			# Update links in the markdown file
-			for title in [ c.title for c in clauses ]:
-				if title in line:
-					clause.lines[i] = line.replace(title, f'{title}/{title.casefold()}.md')
+			if (links := _htmlLink.findall(line)):
+				for lnk in links:
+					width = 4
+					if lnk in linkTargets:
+						clause.lines[i] = line.replace(lnk, f'../{linkTargets[lnk][0]:0{width}}/#{lnk[1:]}')

-	# Create a dictionary with the titles and the corresponding clause
-	clauseDict = {}
-	for clause in clauses:
-		clauseDict[clause.title] = clause
-
-	# Go through the clauses and update the links
+	# Replace the markdown links
 	for clause in clauses:
 		for i, line in enumerate(clause.lines):
-			# Update links in the markdown file
-			for title in clauseDict.keys():
-				if title in line:
-					clause.lines[i] = line.replace(title, f'{title}/{title.casefold()}.md')
+			if (links := _markdownLink.findall(line)):
+				for lnk in links:
+					if lnk in linkTargets:
+						clause.lines[i] = line.replace(lnk, f'../{linkTargets[lnk][0]:0{fnLength}}/#{lnk[1:]}')

 	return clauses

+
 def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None:
 	"""	Copy media files from the source directory to the target directory.

@@ -247,19 +254,27 @@ def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') ->
 	targetDirectory = f'{os.path.dirname(filename)}/{navTitle}/{mediaDirectory}'

 	if os.path.exists(sourceDirectory):
+		print(f'[green]Copying media files from "{sourceDirectory}" to "{targetDirectory}"')
 		shutil.copytree(sourceDirectory, targetDirectory, dirs_exist_ok = True)
-		print(f'[green]Copied media files from "{sourceDirectory}" to "{targetDirectory}"')
 	else:
 		print(f'[red]Media directory "{sourceDirectory}" does not exist')

 	
 def processDocument(args:argparse.Namespace) -> None:
+	global fnLength
 	document = os.path.abspath(args.document)
+	fnLength = args.filename_length
+
+	# Analyse the markdown file
 	clauses = analyseMarkdown(document)
 	clauses = splitMarkdownDocument(clauses, [ t.casefold() for t in args.ignore_clause ], args.split_level)
-	# clauses = updateLinks(clauses)
+	clauses = updateLinks(clauses)
 	clauses = prepareForMkdocs(clauses)
+
+	# Write the clauses to files
 	writeClauses(clauses, document, args.title)
+
+	# Copy the media files
 	copyMediaFiles(document, args.title, args.media_directory)


@@ -267,14 +282,11 @@ if __name__ == '__main__':
 	parser = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter)

 	parser.add_argument('--title', '-t', metavar = 'title', required = True, help = 'mkdocs navigation tile')
-	parser.add_argument('--ignore-clause', '-i', metavar = 'clause', nargs = '+', default = [ 'Contents', 'History' ], help = 'ignore headers in the markdown document')
+	parser.add_argument('--ignore-clause', '-ic', metavar = 'clause', nargs = '+', default = [ 'Contents', 'History' ], help = 'ignore headers in the markdown document')
 	parser.add_argument('--split-level', '-sl', metavar = 'level', type = int, default = 2, help = 'split clauses on which level')
 	parser.add_argument('--media-directory', '-md', metavar = 'media-directory', default = 'media', help = 'directory name where media files are stored')
+	parser.add_argument('--filename-length', '-fl', metavar = 'length', default = 4, help = 'length of the filename with leading zeros')
 	parser.add_argument('document', type = str, help = 'a oneM2M markdown specification document to process')
-
 	args = parser.parse_args()
-
 	processDocument(args)
 	
-	
-