Skip to content
Snippets Groups Projects
Commit ddee5f58 authored by Andreas Kraft's avatar Andreas Kraft
Browse files

Added html and markdown internal links and anchors. Added configurable...

Added html and markdown internal links and anchors. Added configurable filename length. Renamed -i to -ic to avoid confusion
parent 0ab633db
Branches
No related tags found
No related merge requests found
......@@ -7,6 +7,7 @@
# directory structure.
#
from typing import Tuple
import argparse, re, os, shutil
from dataclasses import dataclass
from rich import print
......@@ -19,10 +20,15 @@ class Clause:
lines:list[str]
onlyNav:bool = False
fnLength = 4
_matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE)
_matchCodefence = re.compile(r'\s*```\s?.*', re.IGNORECASE)
_match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
_markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#.*)\)', re.IGNORECASE)
_htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
_htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE)
# TODO handle multiple nav levels (left bar) better (make conifgurable)
# TODO Update links in the markdown files to the new structure
......@@ -37,7 +43,7 @@ def analyseMarkdown(filename:str) -> list[Clause]:
The list of clauses.
"""
print(f'[gray]Analyzing file "{filename}"')
print(f'[green]Analyzing "{filename}"')
with open(filename, 'r') as file:
inLines = file.readlines()
......@@ -165,12 +171,13 @@ def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None:
continue
# write to single files
with open(f'{os.path.dirname(filename)}/{navTitle}/{i}.md', 'w') as file:
print(f'[green]Writing "{i:0{fnLength}}.md" - "{f.title}"')
with open(f'{os.path.dirname(filename)}/{navTitle}/{i:0{fnLength}}.md', 'w') as file:
file.writelines(f.lines)
print(f'[green]File "{i}.md" written - "{f.title}"')
# write nav.yml file
print(f'[green]Writing "_nav.yml"')
with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file:
file.write(f' - {navTitle}:\n')
for i, f in enumerate(outLines):
......@@ -179,16 +186,13 @@ def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None:
else:
if len(f.lines) == 0:
continue
file.write(f" {' '*f.level}- '{f.title}': '{navTitle}/{i}.md'\n")
print(f'[green]File "_nav.yml" written')
file.write(f" {' '*f.level}- '{f.title}': '{navTitle}/{i:0{fnLength}}.md'\n")
_markdownLink = re.compile(r'\[.*\]\((.*)\)', re.IGNORECASE)
_htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
_anchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE)
def updateLinks(clauses:list[Clause]) -> list[Clause]:
""" Update the links in the clauses to the new structure.
""" Update the links in the clauses to the new structure. This is done by
creating a dictionary of all links and their targets and then replacing
the links in the clauses.
Args:
clauses: The list of clauses.
......@@ -196,45 +200,48 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]:
Returns:
The list of clauses.
"""
print(f'[green]Updating links in clauses')
# Build the link target dictionary
linkTargets = {}
# Build the link target dictionary. Mapping anchor -> (clause index, clause)
linkTargets:dict[str, Tuple[int, str]] = {}
# Find all Markdown headers in the clauses and convert them to anchor format
for i, clause in enumerate(clauses):
# Find all headers in the clause
for line in clause.lines:
if (m := _matchHeader.match(line)):
# convert the header to anchor format and add it to the dictionary
# TODO move perhaps to an own function
anchor = m.groups()[1].strip().casefold().replace(' ', '-').replace('.', '')
linkTargets[f'#{anchor}'] = (i, clause)
# Find all HTML anchors in the clauses and add them to the dictionary
for i, clause in enumerate(clauses):
for line in clause.lines:
if (anchors := _htmlAnchorLink.findall(line)):
for a in anchors:
linkTargets[f'#{a}'] = (i, clause)
# Replace the html links
for clause in clauses:
for i, line in enumerate(clause.lines):
if (lnk := _anchorLink.findall(line)):
linkTargets[lnk[0]] = clause
# # Check if the line contains a link
# if not (lnk := _markdownLink.search(line)) and not (lnk := _htmlLink.search(line)) and not (lnk := _anchorLink.search(line)):
# continue
# print(lnk)
# print(lnk.groups()[0])
# Update links in the markdown file
for title in [ c.title for c in clauses ]:
if title in line:
clause.lines[i] = line.replace(title, f'{title}/{title.casefold()}.md')
if (links := _htmlLink.findall(line)):
for lnk in links:
width = 4
if lnk in linkTargets:
clause.lines[i] = line.replace(lnk, f'../{linkTargets[lnk][0]:0{width}}/#{lnk[1:]}')
# Create a dictionary with the titles and the corresponding clause
clauseDict = {}
for clause in clauses:
clauseDict[clause.title] = clause
# Go through the clauses and update the links
# Replace the markdown links
for clause in clauses:
for i, line in enumerate(clause.lines):
# Update links in the markdown file
for title in clauseDict.keys():
if title in line:
clause.lines[i] = line.replace(title, f'{title}/{title.casefold()}.md')
if (links := _markdownLink.findall(line)):
for lnk in links:
if lnk in linkTargets:
clause.lines[i] = line.replace(lnk, f'../{linkTargets[lnk][0]:0{fnLength}}/#{lnk[1:]}')
return clauses
def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None:
""" Copy media files from the source directory to the target directory.
......@@ -247,19 +254,27 @@ def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') ->
targetDirectory = f'{os.path.dirname(filename)}/{navTitle}/{mediaDirectory}'
if os.path.exists(sourceDirectory):
print(f'[green]Copying media files from "{sourceDirectory}" to "{targetDirectory}"')
shutil.copytree(sourceDirectory, targetDirectory, dirs_exist_ok = True)
print(f'[green]Copied media files from "{sourceDirectory}" to "{targetDirectory}"')
else:
print(f'[red]Media directory "{sourceDirectory}" does not exist')
def processDocument(args:argparse.Namespace) -> None:
global fnLength
document = os.path.abspath(args.document)
fnLength = args.filename_length
# Analyse the markdown file
clauses = analyseMarkdown(document)
clauses = splitMarkdownDocument(clauses, [ t.casefold() for t in args.ignore_clause ], args.split_level)
# clauses = updateLinks(clauses)
clauses = updateLinks(clauses)
clauses = prepareForMkdocs(clauses)
# Write the clauses to files
writeClauses(clauses, document, args.title)
# Copy the media files
copyMediaFiles(document, args.title, args.media_directory)
......@@ -267,14 +282,11 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--title', '-t', metavar = 'title', required = True, help = 'mkdocs navigation tile')
parser.add_argument('--ignore-clause', '-i', metavar = 'clause', nargs = '+', default = [ 'Contents', 'History' ], help = 'ignore headers in the markdown document')
parser.add_argument('--ignore-clause', '-ic', metavar = 'clause', nargs = '+', default = [ 'Contents', 'History' ], help = 'ignore headers in the markdown document')
parser.add_argument('--split-level', '-sl', metavar = 'level', type = int, default = 2, help = 'split clauses on which level')
parser.add_argument('--media-directory', '-md', metavar = 'media-directory', default = 'media', help = 'directory name where media files are stored')
parser.add_argument('--filename-length', '-fl', metavar = 'length', default = 4, help = 'length of the filename with leading zeros')
parser.add_argument('document', type = str, help = 'a oneM2M markdown specification document to process')
args = parser.parse_args()
processDocument(args)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment