diff --git a/toMkdocs/toMkdocs.py b/toMkdocs/toMkdocs.py index 54a79bddc23c45cbbb1757c6b90677997bbad5d1..1db1167d624a755430d0b4fb10531de6d3c88113 100644 --- a/toMkdocs/toMkdocs.py +++ b/toMkdocs/toMkdocs.py @@ -91,6 +91,253 @@ class Clause: return sum([ len(l.text) for l in self.lines ]) +class Footnote: + """ Represents a footnote in the markdown file. """ + def __init__(self, id:str, line:Line) -> None: + self.id = id + self.line = line + + +class Document: + """ Represents the document object. """ + clauses:list[Clause] = [] + footnotes:list[Footnote] = [] + + def __init__(self, clauses:list[Clause], footnotes:list[Footnote]) -> None: + self.clauses = clauses + self.footnotes = footnotes + + + def splitMarkdownDocument(self, + ignoreTitles:list[str] = [], + splitLevel:int = 1, + ignoreUntilFirstHeading:bool = True) -> None: + """ Split the clauses at a certain level. This is used to create the separate + markdown files for MkDocs. + + After the split, the clauses are stored in the document object. + + Args: + ignoreTitles: A list of titles that should be ignored. They are not included in the output. + splitLevel: The level at which the clauses should be split. + ignoreUntilFirstHeader: Ignore all clauses until the first heading. + + """ + result:list[Clause] = [Clause(0, '', '', [])] + + ignoreTitles = [ t.casefold() for t in ignoreTitles ] # convert to lower case + + for clause in self.clauses: + level = clause.level + + # Check if the current clause should be ignored + if clause.title.casefold() in ignoreTitles: + continue + + # Add a new output clause if the current clause's level is + # equal or less than the split level + if clause.level <= splitLevel: + result.append(Clause(level, clause.clauseNumber, clause.title, [])) + + # Add the lines to the output clause + result[-1].extend(clause) + + # Remove the first clause if it has no title + if ignoreUntilFirstHeading: + while len(result[0].title) == 0: + result.pop(0) + + self.clauses = result + + + def insertFootnotes(self) -> None: + """ Insert footnotes into the clauses. + + After the insertion, the clauses are stored in the document object. + + """ + print(f'[green]Adding footnotes to clauses') + + for clause in self.clauses: + foundFootnotes:list[Footnote] = [] + for line in clause.lines: + # ATTN: Only footnotes in normal text lines are checked + + if line.lineType == LineType.TEXT and (fn := _inlineFootnote.search(line.text)): + # Find the footnote in the list of footnotes + for f in self.footnotes: + if f.id == fn.groups()[0]: + foundFootnotes.append(f) + + # Insert the footnotes at the end of the clause + if len(foundFootnotes) > 0: + clause.append(Line('\n', LineType.TEXT)) + for f in foundFootnotes: + clause.append(f.line) + + + def updateLinks(self) -> None: + """ Update the links in the clauses to the new structure. This is done by + creating a dictionary of all links and their targets and then replacing + the links in the clauses. + + After the update, the clauses are stored in the document object. + """ + print(f'[green]Updating links in clauses') + + # Build the link target dictionary. Mapping anchor -> clause + linkTargets:dict[str, Clause] = {} + + # Find all Markdown headers in the clauses and convert them to anchor format + for i, clause in enumerate(self.clauses): + # Find all headers in the clause + for line in clause.lines: + if (m := _matchHeader.match(line.text)): + + # convert the header to anchor format and add it to the dictionary + # Remove special characters + # TODO move perhaps to an own function + anchor = m.groups()[1].strip().casefold().replace(' ', '-') + for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'): + anchor = anchor.replace(c, '') + # remove html tags from the anchor + anchor = re.sub(_htmlTag, '', anchor) + + linkTargets[f'#{anchor}'] = clause + if veryVerbose: + print(f'[dim]Added Markdown anchor "{anchor}"') + + # Find all HTML anchors in the clauses and add them to the dictionary + for i, clause in enumerate(self.clauses): + for line in clause.lines: + if (anchors := _htmlAnchorLink.findall(line.text)): + for a in anchors: + linkTargets[f'#{a}'] = clause + if veryVerbose: + print(f'[dim]Found HTML anchor "{a}" in clause "{clause.title}"') + + # Replace the html links + for clause in self.clauses: + for i, line in enumerate(clause.lines): + if (links := _htmlLink.findall(line.text)): + for lnk in links: + if lnk in linkTargets: + line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[lnk].clauseNumber}/#{lnk[1:]}') # Update the current line as well + if veryVerbose: + print(f'[dim]Updated HTML link "{lnk}" in clause "{clause.title}"') + + # Replace the markdown links + for clause in self.clauses: + for i, line in enumerate(clause.lines): + if (links := _markdownLink.findall(line.text)): + # Replace the old link targets with converted + # (lower case) versions that point to the output files + for lnk in links: + _lnk =lnk.casefold() + if _lnk in linkTargets: + line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[_lnk].clauseNumber}/#{lnk[1:]}') # Update the current line as well + if veryVerbose: + print(f'[dim]Updated Markdown link "{lnk}" in clause "{clause.title}"') + + + def updateNotes(self) -> None: + """ Update the notes in the clauses to the mkDocs notes version. + + After the update, the clauses are stored in the document object. + """ + print(f'[green]Updating notes in clauses') + + for clause in self.clauses: + lines:list[Line] = [] + inNote = False + for line in clause.lines: + if line.lineType == LineType.NOTE: + if not inNote: + lines.append(Line('\n', LineType.TEXT)) + lines.append(Line('!!! note\n', LineType.NOTE)) + inNote = True + lines.append(Line(f"\t{re.sub(_matchNoteStart, '', line.text)}", LineType.NOTE)) + if verbose: + print(f'[dim]Converted note in clause "{clause.title}"') + else: + if inNote: + lines.append(Line('\n', LineType.TEXT)) + inNote = False + lines.append(line) + clause.lines = lines + + + def prepareForMkdocs(self) -> None: + """ Prepare the clauses for MkDocs. This includes removing the heading + from the clauses and marking the clauses that are only for navigation. + + After the preparation, the clauses are stored in the document object. + """ + + # Remove the heading from the lines. The heading is the first line + # in the clause. This is done because MkDocs repeats the heading when + # displaying the page. + for clause in self.clauses: + if clause.linesCount > 0: + clause.lines.pop(0) + # Also, remove the first empty lines if they exist + while clause.linesCount > 0 and clause.lines[0].text.strip() == '': + clause.lines.pop(0) + + # Repair wrong markdown for indented lines. + # Add 2 spaces to existing 2-space indentions + for clause in self.clauses: + for i, line in enumerate(clause.lines): + if _match2spaceListIndention.match(line.text): + clause.lines[i].text = ' ' + line.text + + + def writeClausesMkDocs(self, filename:str, navTitle:str) -> None: + """ Write the clauses to separate files and create a navigation file. + + Args: + filename: The name of the original markdown file. + navTitle: The title of the navigation entry. This is used to determine the directories. + """ + + print(f'[green]Writing clauses to files') + # create directory first + os.makedirs(f'{os.path.dirname(filename)}/{navTitle}', exist_ok = True) + + # Write the files + for i, f in enumerate(self.clauses): + # write to single files, even empty ones + if verbose: + print(f'[dim]Writing "{f.clauseNumber}.md" - "{f.title}"') + with open(f'{os.path.dirname(filename)}/{navTitle}/{f.clauseNumber}.md', 'w') as file: + # Add one empty line before the clause. This is done to avoid + # a bug in MkDocs that does not display the first line of a clause + # if it contains a colon. It does not matter otherwise if the line + # is empty or not. + file.writelines(f.asStringList(1)) + + + # write nav.yml file + print(f'[green]Writing "_nav.yml"') + with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file: + if veryVerbose: + print(f'[dim]Writing navigation file') + file.write(f' - {navTitle}:\n') + for i, f in enumerate(self.clauses): + + # TODO handle if the next clause is more than one level deeper + + _title = f.title.replace("'", '"') + nextClause = self.clauses[i+1] if i+1 < len(self.clauses) else None + if nextClause is None or nextClause.level <= f.level: + file.write(f" {' '*f.level}- '{_title}': '{navTitle}/{f.clauseNumber}.md'\n") + else: + file.write(f" {' '*f.level}- '{_title}':\n") + if len(f) > 0: + file.write(f" {' '*nextClause.level}- 'Introduction': '{navTitle}/{f.clauseNumber}.md'\n") + + + _matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE) _matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE) _matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE) @@ -102,6 +349,8 @@ _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) _htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE) _htmlTag = re.compile(r'<[^>]*>', re.IGNORECASE) _matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE) +_footnote = re.compile(r'\[\^([^\]]*)\]:', re.IGNORECASE) +_inlineFootnote = re.compile(r'\[\^([^\]]*)\]', re.IGNORECASE) # TODO handle multiple nav levels (left bar) better (make conifgurable) @@ -124,14 +373,14 @@ def shortHash(value:str, length:int) -> str: ).decode()[:length] -def analyseMarkdown(filename:str) -> list[Clause]: +def analyseMarkdown(filename:str) -> Document: """ Analyse the markdown file and split it into clauses. Args: filename: The name of the markdown file. Returns: - The list of clauses. + The document object. """ print(f'[green]Analyzing "{filename}"') @@ -142,6 +391,7 @@ def analyseMarkdown(filename:str) -> list[Clause]: inLines = file.readlines() outClauses:list[Clause] = [Clause(0, '', '', [])] + footnotes:list[Footnote] = [] # Go through the lines and detect headers and codefences inCodefence = False @@ -169,6 +419,12 @@ def analyseMarkdown(filename:str) -> list[Clause]: if _matchNote.match(line): outClauses[-1].append(Line(line, LineType.NOTE)) continue + + # Detect footnotes + # Footnotes are lines that start with a '^' + if (_fn := _footnote.match(line)): + footnotes.append(Footnote(_fn.groups()[0], Line(line, LineType.TEXT))) + continue # Detect headers _lineType = LineType.TEXT @@ -186,227 +442,7 @@ def analyseMarkdown(filename:str) -> list[Clause]: # Just add the line to the current clause as text outClauses[-1].append(Line(line, _lineType)) - return outClauses - - -def splitMarkdownDocument(clauses:list[Clause], - ignoreTitles:list[str] = [], - splitLevel:int = 1, - ignoreUntilFirstHeading:bool = True) -> list[Clause]: - """ Split the clauses at a certain level. This is used to create the separate - markdown files for MkDocs. - - Args: - clauses: The list of clauses. - ignoreTitles: A list of titles that should be ignored. They are not included in the output. - splitLevel: The level at which the clauses should be split. - ignoreUntilFirstHeader: Ignore all clauses until the first heading. - - Returns: - The list of clauses. - """ - outClauses:list[Clause] = [Clause(0, '', '', [])] - - for clause in clauses: - level = clause.level - - # Check if the current clause should be ignored - if clause.title.casefold() in ignoreTitles: - continue - - # Add a new output clause if the current clause's level is - # equal or less than the split level - if clause.level <= splitLevel: - outClauses.append(Clause(level, clause.clauseNumber, clause.title, [])) - - # Add the lines to the output clause - outClauses[-1].extend(clause) - - # Remove the first clause if it has no title - if ignoreUntilFirstHeading: - while len(outClauses[0].title) == 0: - outClauses.pop(0) - - return outClauses - - -def prepareForMkdocs(clauses:list[Clause]) -> list[Clause]: - """ Prepare the clauses for MkDocs. This includes removing the heading - from the clauses and marking the clauses that are only for navigation. - - Args: - clauses: The list of clauses. - - Returns: - The list of clauses. - """ - - # Remove the heading from the lines. The heading is the first line - # in the clause. This is done because MkDocs repeats the heading when - # displaying the page. - for clause in clauses: - if clause.linesCount > 0: - clause.lines.pop(0) - # Also, remove the first empty lines if they exist - while clause.linesCount > 0 and clause.lines[0].text.strip() == '': - clause.lines.pop(0) - - # Repair wrong markdown for indented lines. - # Add 2 spaces to existing 2-space indentions - for clause in clauses: - for i, line in enumerate(clause.lines): - if _match2spaceListIndention.match(line.text): - clause.lines[i].text = ' ' + line.text - - return clauses - - -def updateLinks(clauses:list[Clause]) -> list[Clause]: - """ Update the links in the clauses to the new structure. This is done by - creating a dictionary of all links and their targets and then replacing - the links in the clauses. - - Args: - clauses: The list of clauses. - - Returns: - The list of clauses. - """ - print(f'[green]Updating links in clauses') - - # Build the link target dictionary. Mapping anchor -> clause - linkTargets:dict[str, Clause] = {} - - # Find all Markdown headers in the clauses and convert them to anchor format - for i, clause in enumerate(clauses): - # Find all headers in the clause - for line in clause.lines: - if (m := _matchHeader.match(line.text)): - - # convert the header to anchor format and add it to the dictionary - # Remove special characters - # TODO move perhaps to an own function - anchor = m.groups()[1].strip().casefold().replace(' ', '-') - for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'): - anchor = anchor.replace(c, '') - # remove html tags from the anchor - anchor = re.sub(_htmlTag, '', anchor) - - linkTargets[f'#{anchor}'] = clause - if veryVerbose: - print(f'[dim]Added Markdown anchor "{anchor}"') - - # Find all HTML anchors in the clauses and add them to the dictionary - for i, clause in enumerate(clauses): - for line in clause.lines: - if (anchors := _htmlAnchorLink.findall(line.text)): - for a in anchors: - linkTargets[f'#{a}'] = clause - if veryVerbose: - print(f'[dim]Found HTML anchor "{a}" in clause "{clause.title}"') - - # Replace the html links - for clause in clauses: - for i, line in enumerate(clause.lines): - if (links := _htmlLink.findall(line.text)): - for lnk in links: - if lnk in linkTargets: - line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[lnk].clauseNumber}/#{lnk[1:]}') # Update the current line as well - if veryVerbose: - print(f'[dim]Updated HTML link "{lnk}" in clause "{clause.title}"') - - # Replace the markdown links - for clause in clauses: - for i, line in enumerate(clause.lines): - if (links := _markdownLink.findall(line.text)): - # Replace the old link targets with converted - # (lower case) versions that point to the output files - for lnk in links: - _lnk =lnk.casefold() - if _lnk in linkTargets: - line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[_lnk].clauseNumber}/#{lnk[1:]}') # Update the current line as well - if veryVerbose: - print(f'[dim]Updated Markdown link "{lnk}" in clause "{clause.title}"') - - return clauses - - -def updateNotes(clauses:list[Clause]) -> list[Clause]: - """ Update the notes in the clauses to the mkDocs notes version. - - Args: - clauses: The list of clauses. - - Returns: - The list of clauses. - """ - print(f'[green]Updating notes in clauses') - - for clause in clauses: - lines:list[Line] = [] - inNote = False - for line in clause.lines: - if line.lineType == LineType.NOTE: - if not inNote: - lines.append(Line('\n', LineType.TEXT)) - lines.append(Line('!!! note\n', LineType.NOTE)) - inNote = True - lines.append(Line(f"\t{re.sub(_matchNoteStart, '', line.text)}", LineType.NOTE)) - if verbose: - print(f'[dim]Converted note in clause "{clause.title}"') - else: - if inNote: - lines.append(Line('\n', LineType.TEXT)) - inNote = False - lines.append(line) - clause.lines = lines - return clauses - - -def writeClauses(outClauses:list[Clause], filename:str, navTitle:str) -> None: - """ Write the clauses to separate files and create a navigation file. - - Args: - outClauses: The list of clauses. - filename: The name of the original markdown file. - navTitle: The title of the navigation entry. This is used to determine the directories. - """ - - print(f'[green]Writing clauses to files') - # create directory first - os.makedirs(f'{os.path.dirname(filename)}/{navTitle}', exist_ok = True) - - # Write the files - for i, f in enumerate(outClauses): - # write to single files, even empty ones - if verbose: - print(f'[dim]Writing "{f.clauseNumber}.md" - "{f.title}"') - with open(f'{os.path.dirname(filename)}/{navTitle}/{f.clauseNumber}.md', 'w') as file: - # Add one empty line before the clause. This is done to avoid - # a bug in MkDocs that does not display the first line of a clause - # if it contains a colon. It does not matter otherwise if the line - # is empty or not. - file.writelines(f.asStringList(1)) - - - # write nav.yml file - print(f'[green]Writing "_nav.yml"') - with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file: - if veryVerbose: - print(f'[dim]Writing navigation file') - file.write(f' - {navTitle}:\n') - for i, f in enumerate(outClauses): - - # TODO handle if the next clause is more than one level deeper - - _title = f.title.replace("'", '"') - nextClause = outClauses[i+1] if i+1 < len(outClauses) else None - if nextClause is None or nextClause.level <= f.level: - file.write(f" {' '*f.level}- '{_title}': '{navTitle}/{f.clauseNumber}.md'\n") - else: - file.write(f" {' '*f.level}- '{_title}':\n") - if len(f) > 0: - file.write(f" {' '*nextClause.level}- 'Introduction': '{navTitle}/{f.clauseNumber}.md'\n") + return Document(outClauses, footnotes) def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None: @@ -429,24 +465,25 @@ def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> def processDocument(args:argparse.Namespace) -> None: global verbose, veryVerbose - document = os.path.abspath(args.document) + inDocumentFilename = os.path.abspath(args.document) veryVerbose = args.very_verbose verbose = args.verbose if veryVerbose: verbose = True # Analyse the markdown file - clauses = analyseMarkdown(document) - clauses = splitMarkdownDocument(clauses, [ t.casefold() for t in args.ignore_clause ], args.split_level) - clauses = updateLinks(clauses) - clauses = updateNotes(clauses) - clauses = prepareForMkdocs(clauses) + document = analyseMarkdown(inDocumentFilename) + document.splitMarkdownDocument(args.ignore_clause, args.split_level) + document.insertFootnotes() + document.updateLinks() + document.updateNotes() + document.prepareForMkdocs() # Write the clauses to files - writeClauses(clauses, document, args.title) + document.writeClausesMkDocs(inDocumentFilename, args.title) # Copy the media files - copyMediaFiles(document, args.title, args.media_directory) + copyMediaFiles(inDocumentFilename, args.title, args.media_directory) if __name__ == '__main__':