Skip to content
Snippets Groups Projects
Commit c47b9d6a authored by Andreas Kraft's avatar Andreas Kraft
Browse files

Changed generated file names to section numbers or a short hash. Added verbose...

Changed generated file names to section numbers or a short hash. Added verbose and veryVerbose CLAs. Added converting of notes to MkDocs admonitions
parent b52168fa
No related branches found
No related tags found
No related merge requests found
......@@ -7,31 +7,81 @@
# directory structure.
#
from typing import Tuple
import argparse, re, os, shutil
from enum import Enum, auto
import argparse, re, os, shutil, hashlib, base64
from dataclasses import dataclass
from rich import print
verbose = False
veryVerbose = False
class LineType(Enum):
""" Represents the type of a line in the markdown file. """
HEADING = auto()
TEXT = auto()
CODEFENCESTART = auto()
CODE = auto()
CODEFENCEEND = auto()
LIST = auto()
NOTE = auto()
@dataclass
class Line:
""" Represents a line in the markdown file. """
text:str
lineType:LineType = LineType.TEXT
@dataclass
class Clause:
""" Represents a clause in the markdown file. """
level:int
clauseNumber:str
title:str
lines:list[str]
lines:list[Line]
onlyNav:bool = False
fnLength = 4
def asStringList(self) -> list[str]:
""" Return the clause as a list of strings.
Returns:
The clause's lines as a list of strings.
"""
return [ l.text for l in self.lines ]
_matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE)
_matchCodefence = re.compile(r'\s*```\s?.*', re.IGNORECASE)
_matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE)
_matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE)
_matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE)
_matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE)
_match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
_markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#.*)\)', re.IGNORECASE)
_markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE)
_htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
_htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE)
_matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE)
# TODO handle multiple nav levels (left bar) better (make conifgurable)
# TODO Update links in the markdown files to the new structure
def shortHash(value:str, length:int) -> str:
""" Generate a short hash of a string value.
Args:
value: The value to hash.
length: The length of the hash.
Returns:
The hash.
"""
return base64.b64encode(
hashlib.sha256(
value.encode()
).digest()
).decode()[:length]
def analyseMarkdown(filename:str) -> list[Clause]:
""" Analyse the markdown file and split it into clauses.
......@@ -48,26 +98,44 @@ def analyseMarkdown(filename:str) -> list[Clause]:
with open(filename, 'r') as file:
inLines = file.readlines()
outLines:list[Clause] = [Clause(0, '', [])]
outLines:list[Clause] = [Clause(0, '', '', [])]
# Go through the lines and detect headers and codefences
inCodefence = False
for line in inLines:
# Detect codefences
if _matchCodefence.match(line):
inCodefence = not inCodefence
if _matchCodefenceStart.match(line):
inCodefence = True
outLines[-1].lines.append(Line(line, LineType.CODEFENCESTART))
continue
if _matchCodefenceEnd.match(line):
inCodefence = False
outLines[-1].lines.append(Line(line, LineType.CODEFENCEND))
continue
if inCodefence:
outLines[-1].lines.append(line)
outLines[-1].lines.append(Line(line, LineType.CODE))
continue
# Detect notes
if _matchNote.match(line):
outLines[-1].lines.append(Line(line, LineType.NOTE))
continue
# Detect headers
_lineType = LineType.TEXT
if (m := _matchHeader.match(line)):
level = len(m.groups()[0])
clauseTitle = m.groups()[1].strip()
outLines.append(Clause(level, clauseTitle, []))
headerNumber = _matchHeaderNumber.search(clauseTitle)
outLines.append(Clause(len(m.groups()[0]), # level
headerNumber.group() if headerNumber else shortHash(clauseTitle, 6),
clauseTitle,
[]))
_lineType = LineType.HEADING
outLines[-1].lines.append(line)
outLines[-1].lines.append(Line(line, _lineType))
return outLines
......@@ -88,7 +156,7 @@ def splitMarkdownDocument(clauses:list[Clause],
Returns:
The list of clauses.
"""
outLines:list[Clause] = [Clause(0, '', [])]
outLines:list[Clause] = [Clause(0, '', '', [])]
for clause in clauses:
level = clause.level
......@@ -100,7 +168,7 @@ def splitMarkdownDocument(clauses:list[Clause],
# Add a new output clause if the current clause's level is
# equal or less than the split level
if clause.level <= splitLevel:
outLines.append(Clause(level, clause.title, []))
outLines.append(Clause(level, clause.clauseNumber, clause.title, []))
# Add the lines to the output clause
outLines[-1].lines.extend(clause.lines)
......@@ -131,64 +199,26 @@ def prepareForMkdocs(clauses:list[Clause]) -> list[Clause]:
if len(clause.lines) > 0:
clause.lines.pop(0)
# Also, remove the first empty lines if they exist
while len(clause.lines) > 0 and clause.lines[0].strip() == '':
while len(clause.lines) > 0 and clause.lines[0].text.strip() == '':
clause.lines.pop(0)
# Mark the whole clause if it is the first AND NOT only clause
# for a parent clause. Then it is usually empty except the heading.
# We still need it for navigation, so we mark it as onlyNav
for clause in clauses:
if len(''.join(clause.lines).strip()) == 0 and clause.level > 0:
if len(''.join(clause.asStringList()).strip()) == 0 and clause.level > 0:
clause.onlyNav = True
# Repair wrong markdown for indented lines.
# Add 2 spaces to existing 2-space indentions
for clause in clauses:
for i, line in enumerate(clause.lines):
if _match2spaceListIndention.match(line):
clause.lines[i] = ' ' + line
if _match2spaceListIndention.match(line.text):
clause.lines[i].text = ' ' + line.text
return clauses
def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None:
""" Write the clauses to separate files and create a navigation file.
Args:
outLines: The list of clauses.
filename: The name of the original markdown file.
navTitle: The title of the navigation entry. This is used to determine the directories.
"""
# Write the files
# create directory first
os.makedirs(f'{os.path.dirname(filename)}/{navTitle}', exist_ok = True)
for i, f in enumerate(outLines):
if len(f.lines) == 0 or f.onlyNav: # ignore empty clauses or clauses that are only for navigation
print(f'[green]Navigation only - "{f.title}"')
continue
# write to single files
print(f'[green]Writing "{i:0{fnLength}}.md" - "{f.title}"')
with open(f'{os.path.dirname(filename)}/{navTitle}/{i:0{fnLength}}.md', 'w') as file:
file.writelines(f.lines)
# write nav.yml file
print(f'[green]Writing "_nav.yml"')
with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file:
file.write(f' - {navTitle}:\n')
for i, f in enumerate(outLines):
if f.onlyNav:
file.write(f" {' '*f.level}- '{f.title}':\n")
#file.write(f"{' '*f.level}- '{f.title}':\n")
else:
if len(f.lines) == 0:
continue
file.write(f" {' '*f.level}- '{f.title}': '{navTitle}/{i:0{fnLength}}.md'\n")
#file.write(f"{' '*f.level}- '{f.title}': '{navTitle}/{i:0{fnLength}}.md'\n")
def updateLinks(clauses:list[Clause]) -> list[Clause]:
""" Update the links in the clauses to the new structure. This is done by
creating a dictionary of all links and their targets and then replacing
......@@ -202,47 +232,133 @@ def updateLinks(clauses:list[Clause]) -> list[Clause]:
"""
print(f'[green]Updating links in clauses')
# Build the link target dictionary. Mapping anchor -> (clause index, clause)
linkTargets:dict[str, Tuple[int, str]] = {}
# Build the link target dictionary. Mapping anchor -> clause
linkTargets:dict[str, Clause] = {}
# Find all Markdown headers in the clauses and convert them to anchor format
for i, clause in enumerate(clauses):
# Find all headers in the clause
for line in clause.lines:
if (m := _matchHeader.match(line)):
if (m := _matchHeader.match(line.text)):
# convert the header to anchor format and add it to the dictionary
# Remove special characters
# TODO move perhaps to an own function
anchor = m.groups()[1].strip().casefold().replace(' ', '-').replace('.', '')
linkTargets[f'#{anchor}'] = (i, clause)
anchor = m.groups()[1].strip().casefold().replace(' ', '-')
for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'):
anchor = anchor.replace(c, '')
linkTargets[f'#{anchor}'] = clause
if veryVerbose:
print(f'[dim]Added Markdown anchor "{anchor}"')
# Find all HTML anchors in the clauses and add them to the dictionary
for i, clause in enumerate(clauses):
for line in clause.lines:
if (anchors := _htmlAnchorLink.findall(line)):
if (anchors := _htmlAnchorLink.findall(line.text)):
for a in anchors:
linkTargets[f'#{a}'] = (i, clause)
print(f'[green]Found anchor "{a}" in clause "{clause.title}"')
linkTargets[f'#{a}'] = clause
if veryVerbose:
print(f'[dim]Found HTML anchor "{a}" in clause "{clause.title}"')
# Replace the html links
for clause in clauses:
for i, line in enumerate(clause.lines):
if (links := _htmlLink.findall(line)):
if (links := _htmlLink.findall(line.text)):
for lnk in links:
width = 4
if lnk in linkTargets:
line = clause.lines[i] = line.replace(lnk, f'../{linkTargets[lnk][0]:0{width}}/#{lnk[1:]}') # Update the current line as well
line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[lnk].clauseNumber}/#{lnk[1:]}') # Update the current line as well
if veryVerbose:
print(f'[dim]Updated HTML link "{lnk}" in clause "{clause.title}"')
# Replace the markdown links
for clause in clauses:
for i, line in enumerate(clause.lines):
if (links := _markdownLink.findall(line)):
if (links := _markdownLink.findall(line.text)):
# Replace the old link targets with converted
# (lower case) versions that point to the output files
for lnk in links:
if lnk in linkTargets:
line = clause.lines[i] = line.replace(lnk, f'../{linkTargets[lnk][0]:0{fnLength}}/#{lnk[1:]}') # Update the current line as well
_lnk =lnk.casefold()
if _lnk in linkTargets:
line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[_lnk].clauseNumber}/#{lnk[1:]}') # Update the current line as well
if veryVerbose:
print(f'[dim]Updated Markdown link "{lnk}" in clause "{clause.title}"')
return clauses
def updateNotes(clauses:list[Clause]) -> list[Clause]:
""" Update the notes in the clauses to the mkDocs notes version.
Args:
clauses: The list of clauses.
Returns:
The list of clauses.
"""
print(f'[green]Updating notes in clauses')
for clause in clauses:
lines:list[Line] = []
inNote = False
for line in clause.lines:
match line.lineType:
case LineType.NOTE:
if not inNote:
lines.append(Line('\n', LineType.TEXT))
lines.append(Line('!!! note\n', LineType.NOTE))
inNote = True
lines.append(Line(f"\t{re.sub(_matchNoteStart, '', line.text)}", LineType.NOTE))
if verbose:
print(f'[dim]Converted note in clause "{clause.title}"')
case _:
if inNote:
lines.append(Line('\n', LineType.TEXT))
inNote = False
lines.append(line)
clause.lines = lines
return clauses
def writeClauses(outLines:list[Clause], filename:str, navTitle:str) -> None:
""" Write the clauses to separate files and create a navigation file.
Args:
outLines: The list of clauses.
filename: The name of the original markdown file.
navTitle: The title of the navigation entry. This is used to determine the directories.
"""
print(f'[green]Writing clauses to files')
# Write the files
# create directory first
os.makedirs(f'{os.path.dirname(filename)}/{navTitle}', exist_ok = True)
for i, f in enumerate(outLines):
if len(f.lines) == 0 or f.onlyNav: # ignore empty clauses or clauses that are only for navigation
if verbose:
print(f'[dim]Navigation only - "{f.title}"')
continue
# write to single files
if verbose:
print(f'[dim]Writing "{f.clauseNumber}.md" - "{f.title}"')
with open(f'{os.path.dirname(filename)}/{navTitle}/{f.clauseNumber}.md', 'w') as file:
file.writelines(f.asStringList())
# write nav.yml file
print(f'[green]Writing "_nav.yml"')
with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file:
if veryVerbose:
print(f'[dim]Writing navigation file')
file.write(f' - {navTitle}:\n')
for i, f in enumerate(outLines):
if f.onlyNav:
file.write(f" {' '*f.level}- '{f.title}':\n")
else:
if len(f.lines) == 0:
continue
file.write(f" {' '*f.level}- '{f.title}': '{navTitle}/{f.clauseNumber}.md'\n")
def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None:
""" Copy media files from the source directory to the target directory.
......@@ -262,14 +378,18 @@ def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') ->
def processDocument(args:argparse.Namespace) -> None:
global fnLength
global verbose, veryVerbose
document = os.path.abspath(args.document)
fnLength = args.filename_length
veryVerbose = args.very_verbose
verbose = args.verbose
if veryVerbose:
verbose = True
# Analyse the markdown file
clauses = analyseMarkdown(document)
clauses = splitMarkdownDocument(clauses, [ t.casefold() for t in args.ignore_clause ], args.split_level)
clauses = updateLinks(clauses)
clauses = updateNotes(clauses)
clauses = prepareForMkdocs(clauses)
# Write the clauses to files
......@@ -282,11 +402,12 @@ def processDocument(args:argparse.Namespace) -> None:
if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--verbose', '-v', action = 'store_true', help = 'verbose output during processing')
parser.add_argument('--very-verbose', '-vv', action = 'store_true', help = 'very verbose output during processing')
parser.add_argument('--title', '-t', metavar = 'title', required = True, help = 'mkdocs navigation tile')
parser.add_argument('--ignore-clause', '-ic', metavar = 'clause', nargs = '+', default = [ 'Contents', 'History' ], help = 'ignore headers in the markdown document')
parser.add_argument('--split-level', '-sl', metavar = 'level', type = int, default = 2, help = 'split clauses on which level')
parser.add_argument('--media-directory', '-md', metavar = 'media-directory', default = 'media', help = 'directory name where media files are stored')
parser.add_argument('--filename-length', '-fl', metavar = 'length', default = 4, help = 'length of the filename with leading zeros')
parser.add_argument('document', type = str, help = 'a oneM2M markdown specification document to process')
args = parser.parse_args()
processDocument(args)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment