diff --git a/spec_on_pages.sh b/spec_on_pages.sh index 006027720dbee09fd9278fa32bffd98eab0f3cb6..1f832e7541c16b7c460d99f5def4a63e78a7e0bb 100644 --- a/spec_on_pages.sh +++ b/spec_on_pages.sh @@ -44,11 +44,14 @@ ls ${9^^} echo "------ Move to docs folder --------" mv ${9^^}/* docs/ sed -i 's/'${9^^}'\///g' _nav.yml +cat _nav.yml +echo " - '': 'index.md'" >> mkdocs.yml cat _nav.yml >> mkdocs.yml cp docs/0.md docs/index.md -echo " - '': 'index.md'" >> mkdocs.yml sed -i 's/##PROJECT/'${9^^}'/g' mkdocs.yml +cat mkdocs.yml + ## Create download tab if official baseline version if [[ ${10} == v* ]]; then echo "Adding download tab for version ${10}..." diff --git a/toMkdocs/toMkdocs.py b/toMkdocs/toMkdocs.py index 19f9d820661af23f38a73023d7e54ce81b5bd4ee..49778dee08a1293f371d8c44f9b403ad0c6992a2 100644 --- a/toMkdocs/toMkdocs.py +++ b/toMkdocs/toMkdocs.py @@ -139,6 +139,277 @@ class Clause: return sum([ len(l.text.strip()) for l in self.lines ]) +class Footnote: + """ Represents a footnote in the markdown file. """ + def __init__(self, id:str, line:Line) -> None: + self.id = id + self.line = line + + +class Document: + """ Represents the document object. """ + clauses:list[Clause] = [] + footnotes:list[Footnote] = [] + + def __init__(self, clauses:list[Clause], footnotes:list[Footnote]) -> None: + self.clauses = clauses + self.footnotes = footnotes + + + def splitMarkdownDocument(self, + ignoreTitles:list[str] = [], + splitLevel:int = 1, + ignoreUntilFirstHeading:bool = False) -> None: + """ Split the clauses at a certain level. This is used to create the separate + markdown files for MkDocs. + + After the split, the clauses are stored in the document object. + + Args: + ignoreTitles: A list of titles that should be ignored. They are not included in the output. + splitLevel: The level at which the clauses should be split. + ignoreUntilFirstHeader: Ignore all clauses until the first heading. + + """ + result:list[Clause] = [] + + ignoreTitles = [ t.casefold() for t in ignoreTitles ] # convert to lower case + + for clause in self.clauses: + level = clause.level + + # Check if the current clause should be ignored + if clause.title.casefold() in ignoreTitles: + continue + + # Add a new output clause if the current clause's level is + # equal or less than the split level + if clause.level <= splitLevel: + result.append(Clause(level, clause.clauseNumber, clause.title, [])) + + # Add the lines to the output clause + result[-1].extend(clause) + + # Remove the first clause if it has no title + if ignoreUntilFirstHeading: + while len(result[0].title) == 0: + result.pop(0) + + self.clauses = result + + + def insertFootnotes(self) -> None: + """ Insert footnotes into the clauses. + + After the insertion, the clauses are stored in the document object. + + """ + print(f'[green]Adding footnotes to clauses') + + for clause in self.clauses: + foundFootnotes:list[Footnote] = [] + for line in clause.lines: + # ATTN: Only footnotes in normal text lines are checked + + if line.lineType == LineType.TEXT and (fn := _inlineFootnote.search(line.text)): + # Find the footnote in the list of footnotes + for f in self.footnotes: + if f.id == fn.groups()[0]: + foundFootnotes.append(f) + + # Insert the footnotes at the end of the clause + if len(foundFootnotes) > 0: + clause.append(Line('\n', LineType.TEXT)) + for f in foundFootnotes: + clause.append(f.line) + + + def updateLinks(self) -> None: + """ Update the links in the clauses to the new structure. This is done by + creating a dictionary of all links and their targets and then replacing + the links in the clauses. + + After the update, the clauses are stored in the document object. + """ + print(f'[green]Updating links in clauses') + + # Build the link target dictionary. Mapping anchor -> clause + linkTargets:dict[str, Clause] = {} + + # Find all Markdown headers in the clauses and convert them to anchor format + for i, clause in enumerate(self.clauses): + # Find all headers in the clause + for line in clause.lines: + if (m := _matchHeader.match(line.text)): + + # convert the header to anchor format and add it to the dictionary + # Remove special characters + # TODO move perhaps to an own function + anchor = m.groups()[1].strip().casefold().replace(' ', '-') + for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'): + anchor = anchor.replace(c, '') + # remove html tags from the anchor + anchor = re.sub(_htmlTag, '', anchor) + + linkTargets[f'#{anchor}'] = clause + if veryVerbose: + print(f'[dim]Added Markdown anchor "{anchor}"') + + # Find all HTML anchors in the clauses and add them to the dictionary + for i, clause in enumerate(self.clauses): + for line in clause.lines: + if (anchors := _htmlAnchorLink.findall(line.text)): + for a in anchors: + linkTargets[f'#{a}'] = clause + if veryVerbose: + print(f'[dim]Found HTML anchor "{a}" in clause "{clause.title}"') + + # Replace the html links + for clause in self.clauses: + for i, line in enumerate(clause.lines): + if (links := _htmlLink.findall(line.text)): + for lnk in links: + if lnk in linkTargets: + line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[lnk].clauseNumber}/#{lnk[1:]}') # Update the current line as well + if veryVerbose: + print(f'[dim]Updated HTML link "{lnk}" in clause "{clause.title}"') + + # Replace the markdown links + for clause in self.clauses: + for i, line in enumerate(clause.lines): + if (links := _markdownLink.findall(line.text)): + # Replace the old link targets with converted + # (lower case) versions that point to the output files + for lnk in links: + _lnk =lnk.casefold() + if _lnk in linkTargets: + line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[_lnk].clauseNumber}/#{lnk[1:]}') # Update the current line as well + if veryVerbose: + print(f'[dim]Updated Markdown link "{lnk}" in clause "{clause.title}"') + + + def updateNotes(self) -> None: + """ Update the notes in the clauses to the mkDocs notes version. + + After the update, the clauses are stored in the document object. + """ + print(f'[green]Updating notes in clauses') + + for clause in self.clauses: + lines:list[Line] = [] + inNote = False + for line in clause.lines: + if line.lineType == LineType.NOTE: + if not inNote: + lines.append(Line('\n', LineType.TEXT)) + lines.append(Line('!!! note\n', LineType.NOTE)) + inNote = True + lines.append(Line(f"\t{re.sub(_matchNoteStart, '', line.text)}", LineType.NOTE)) + if verbose: + print(f'[dim]Converted note in clause "{clause.title}"') + else: + if inNote: + lines.append(Line('\n', LineType.TEXT)) + inNote = False + lines.append(line) + clause.lines = lines + + + def prepareForMkdocs(self, includeHangingParagraphs:bool = False) -> None: + """ Prepare the clauses for MkDocs. This includes removing the heading + from the clauses and marking the clauses that are only for navigation. + + After the preparation, the clauses are stored in the document object. + + Args: + includeHangingParagraphs: Include hanging paragraphs in the output. + """ + + # Remove the heading from the lines. The heading is the first line + # in the clause. This is done because MkDocs repeats the heading when + # displaying the page. + for clause in self.clauses: + if clause.linesCount > 0: + clause.lines.pop(0) + # Also, remove the first empty lines if they exist + while clause.linesCount > 0 and clause.lines[0].text.strip() == '': + clause.lines.pop(0) + + # Detect and handle hanging paragraphs. This is extra text in a clause, which + # has sub-clauses. This text is not allowed in oneM2M specifications. + for i, clause in enumerate(self.clauses): + if clause.level > 0 and clause.linesCount > 0: + # Check if there is a sub-clause in the next clause + if i + 1 < len(self.clauses) and self.clauses[i+1].level > clause.level: + # This is a hanging paragraph. Remove the text from the current clause. + print(f'[yellow]Hanging paragraph in clause "{clause.title}" {"(removed)" if not includeHangingParagraphs else "(kept)"}') + if not includeHangingParagraphs: + self.clauses[i].lines = [] + else: + self.clauses[i].lines = [Line("<mark>Editor note: This is a hanging paragraph and it must be moved to its own clause</mark>")] + [Line()] + self.clauses[i].lines + + # Repair wrong markdown for indented lines. + # Add 2 spaces to existing 2-space indentions + for clause in self.clauses: + for i, line in enumerate(clause.lines): + if _match2spaceListIndention.match(line.text): + clause.lines[i].text = ' ' + line.text + + + def writeClausesMkDocs(self, filename:str, navTitle:str, addNavTitle:bool = False) -> None: + """ Write the clauses to separate files and create a navigation file. + + Args: + filename: The name of the original markdown file. + navTitle: The title of the navigation entry. This is used to determine the directories. + addNavTitle: Add the title as an extra navigation level to the navigation file. + """ + + print(f'[green]Writing clauses to files') + # create directory first + os.makedirs(f'{os.path.dirname(filename)}/{navTitle}', exist_ok = True) + + # Write the files + for i, f in enumerate(self.clauses): + # write to single files, even empty ones + if verbose: + print(f'[dim]Writing "{f.clauseNumber}.md" - "{f.title}"') + with open(f'{os.path.dirname(filename)}/{navTitle}/{f.clauseNumber}.md', 'w') as file: + # Add one empty line before the clause. This is done to avoid + # a bug in MkDocs that does not display the first line of a clause + # if it contains a colon. It does not matter otherwise if the line + # is empty or not. + file.writelines(f.asStringList(1)) + + + # write nav.yml file + print(f'[green]Writing "_nav.yml"') + indentation = ' ' if addNavTitle else '' # TODO make number of spaces configurable + with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file: + if veryVerbose: + print(f'[dim]Writing navigation file') + if addNavTitle: + file.write(f'{indentation}- {navTitle}:\n') + for i, f in enumerate(self.clauses): + + if not f.title: + print("continue") + continue + + # TODO handle if the next clause is more than one level deeper + + _title = f.title.replace("'", '"') + nextClause = self.clauses[i+1] if i+1 < len(self.clauses) else None + if nextClause is None or nextClause.level <= f.level: + file.write(f"{indentation}{' '*f.level}- '{_title}': '{navTitle}/{f.clauseNumber}.md'\n") + else: + file.write(f"{indentation}{' '*f.level}- '{_title}':\n") + if len(f) > 0: + file.write(f"{indentation}{' '*nextClause.level}- 'Hanging paragraph': '{navTitle}/{f.clauseNumber}.md'\n") + + + + _matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE) _matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE) _matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE) @@ -153,6 +424,8 @@ _htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE) _htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE) _htmlTag = re.compile(r'<[^>]*>', re.IGNORECASE) _matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE) +_footnote = re.compile(r'\[\^([^\]]*)\]:', re.IGNORECASE) +_inlineFootnote = re.compile(r'\[\^([^\]]*)\]', re.IGNORECASE) # TODO handle multiple nav levels (left bar) better (make conifgurable) @@ -175,14 +448,14 @@ def shortHash(value:str, length:int) -> str: ).decode()[:length] -def analyseMarkdown(filename:str) -> list[Clause]: +def analyseMarkdown(filename:str) -> Document: """ Analyse the markdown file and split it into clauses. Args: filename: The name of the markdown file. Returns: - The list of clauses. + The document object. """ print(f'[green]Analyzing "{filename}"') @@ -194,6 +467,7 @@ def analyseMarkdown(filename:str) -> list[Clause]: # The list of clauses. The first clause contains the text before the first heading. outClauses:list[Clause] = [Clause(0, '', '', [])] + footnotes:list[Footnote] = [] # Go through the lines and detect headers and codefences inCodefence = False @@ -244,10 +518,16 @@ def analyseMarkdown(filename:str) -> list[Clause]: outClauses[-1].append(Line(line, LineType.NOTE)) continue + # Detect footnotes + # Footnotes are lines that start with a '^' + if (_fn := _footnote.match(line)): + footnotes.append(Footnote(_fn.groups()[0], Line(line, LineType.TEXT))) + continue + # Detect images on a single line if (m := _matchStandAloneImage.match(line)): outClauses[-1].append(Line(line, LineType.STANDALONEIMAGE)) - continue + continue # Detect headers _lineType = LineType.TEXT @@ -265,261 +545,7 @@ def analyseMarkdown(filename:str) -> list[Clause]: # Just add the line to the current clause as text outClauses[-1].append(Line(line, _lineType)) - return outClauses - - -def splitMarkdownDocument(clauses:list[Clause], - ignoreTitles:list[str] = [], - splitLevel:int = 1, - includeUntilFirstHeading:bool = False) -> list[Clause]: - """ Split the clauses at a certain level. This is used to create the separate - markdown files for MkDocs. - - Args: - clauses: The list of clauses. - ignoreTitles: A list of titles that should be ignored. They are not included in the output. - splitLevel: The level at which the clauses should be split. - includeUntilFirstHeader: Ignore all clauses until the first heading. - - Returns: - The list of clauses. - """ - outClauses:list[Clause] = [Clause(0, '', '', [])] - - for clause in clauses: - level = clause.level - - # Check if the current clause should be ignored - if clause.title.casefold() in ignoreTitles: - continue - - # Add a new output clause if the current clause's level is - # equal or less than the split level - if clause.level <= splitLevel: - outClauses.append(Clause(level, clause.clauseNumber, clause.title, [])) - - # Add the lines to the output clause - outClauses[-1].extend(clause) - - # Remove the first clauses if they contain no lines AND the title is empty - while outClauses[0].linesCount == 0 and not len(outClauses[0].title): - outClauses.pop(0) - - # Remove the first clause if it has no title - if not includeUntilFirstHeading: - while len(outClauses[0].title) == 0: - outClauses.pop(0) - - return outClauses - - -def prepareForMkdocs(clauses:list[Clause], includeHangingParagraphs:bool = False) -> list[Clause]: - """ Prepare the clauses for MkDocs. This includes removing the heading - from the clauses and marking the clauses that are only for navigation. - - Args: - clauses: The list of clauses. - includeHangingParagraphs: Include hanging paragraphs in the output. - - Returns: - The list of clauses. - """ - - # Remove the heading from the lines. The heading is (usually) the first line - # in the clause. This is done because MkDocs repeats the heading when - # displaying the page. - for clause in clauses: - if clause.linesCount > 0: - # Remove the first line from the clause if it is a heading - if clause.lines[0].lineType == LineType.HEADING: - clause.lines.pop(0) - # Also, remove the first empty lines if they exist - while clause.linesCount > 0 and clause.lines[0].text.strip() == '': - clause.lines.pop(0) - - # Detect and handle hanging paragraphs. This is extra text in a clause, which - # has sub-clauses. This text is not allowed in oneM2M specifications. - for i, clause in enumerate(clauses): - if clause.level > 0 and clause.linesCount > 0: - # Check if there is a sub-clause in the next clause - if i + 1 < len(clauses) and clauses[i+1].level > clause.level: - # This is a hanging paragraph. Remove the text from the current clause. - print(f'[yellow]Hanging paragraph in clause "{clause.title}" {"(removed)" if not includeHangingParagraphs else "(kept + warning)"}') - if not includeHangingParagraphs: - clauses[i].lines = [] - else: - clauses[i].lines = [Line('<mark>Editor note: This is a hanging paragraph and it must be moved to its own clause</mark>\n'), Line()] + clauses[i].lines - - # Repair wrong markdown for indented lines. - # Add 2 spaces to existing 2-space indentions - for clause in clauses: - for i, line in enumerate(clause.lines): - if _match2spaceListIndention.match(line.text): - clause.lines[i].text = ' ' + line.text - - return clauses - - -def updateLinks(clauses:list[Clause]) -> list[Clause]: - """ Update the links in the clauses to the new structure. This is done by - creating a dictionary of all links and their targets and then replacing - the links in the clauses. - - Args: - clauses: The list of clauses. - - Returns: - The list of clauses. - """ - print(f'[green]Updating links in clauses') - - # Build the link target dictionary. Mapping anchor -> clause - linkTargets:dict[str, Clause] = {} - - # Find all Markdown headers in the clauses and convert them to anchor format - for i, clause in enumerate(clauses): - # Find all headers in the clause - for line in clause.lines: - if (m := _matchHeader.match(line.text)): - - # convert the header to anchor format and add it to the dictionary - # Remove special characters - # TODO move perhaps to an own function - anchor = m.groups()[1].strip().casefold().replace(' ', '-') - for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'): - anchor = anchor.replace(c, '') - # remove html tags from the anchor - anchor = re.sub(_htmlTag, '', anchor) - - linkTargets[f'#{anchor}'] = clause - if veryVerbose: - print(f'[dim]Added Markdown anchor "{anchor}"') - - # Find all HTML anchors in the clauses and add them to the dictionary - for i, clause in enumerate(clauses): - for line in clause.lines: - if (anchors := _htmlAnchorLink.findall(line.text)): - for a in anchors: - linkTargets[f'#{a}'] = clause - if veryVerbose: - print(f'[dim]Found HTML anchor "{a}" in clause "{clause.title}"') - - # Replace the html links - for clause in clauses: - for i, line in enumerate(clause.lines): - if (links := _htmlLink.findall(line.text)): - for lnk in links: - if lnk in linkTargets: - line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[lnk].clauseNumber}/#{lnk[1:]}') # Update the current line as well - if veryVerbose: - print(f'[dim]Updated HTML link "{lnk}" in clause "{clause.title}"') - - # Replace the markdown links - for clause in clauses: - for i, line in enumerate(clause.lines): - if (links := _markdownLink.findall(line.text)): - # Replace the old link targets with converted - # (lower case) versions that point to the output files - for lnk in links: - _lnk =lnk.casefold() - if _lnk in linkTargets: - line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[_lnk].clauseNumber}/#{lnk[1:]}') # Update the current line as well - if veryVerbose: - print(f'[dim]Updated Markdown link "{lnk}" in clause "{clause.title}"') - - return clauses - - -def updateNotes(clauses:list[Clause]) -> list[Clause]: - """ Update the notes in the clauses to the mkDocs notes version. - - Args: - clauses: The list of clauses. - - Returns: - The list of clauses. - """ - print(f'[green]Updating notes in clauses') - - for clause in clauses: - lines:list[Line] = [] - inNote = False - for line in clause.lines: - if line.lineType == LineType.NOTE: - if not inNote: - lines.append(Line('\n', LineType.TEXT)) - lines.append(Line('!!! note\n', LineType.NOTE)) - inNote = True - lines.append(Line(f"\t{re.sub(_matchNoteStart, '', line.text)}", LineType.NOTE)) - if verbose: - print(f'[dim]Converted note in clause "{clause.title}"') - else: - if inNote: - lines.append(Line('\n', LineType.TEXT)) - inNote = False - lines.append(line) - clause.lines = lines - return clauses - - -def writeClauses(outClauses:list[Clause], filename:str, navTitle:str, addNavTitle:bool = False) -> None: - """ Write the clauses to separate files and create a navigation file. - - Args: - outClauses: The list of clauses. - filename: The name of the original markdown file. - navTitle: The title of the navigation entry. This is used to determine the directories. - addNavTitle: Add the title as an extra navigation level to the navigation file. - """ - - print(f'[green]Writing clauses to files') - # create directory first - os.makedirs(f'{os.path.dirname(filename)}/{navTitle}', exist_ok = True) - - # Write the files - for i, f in enumerate(outClauses): - # write to single files, even empty ones - if verbose: - print(f'[dim]Writing "{f.clauseNumber}.md" - "{f.title}"') - with open(f'{os.path.dirname(filename)}/{navTitle}/{f.clauseNumber}.md', 'w') as file: - # Add one empty line before the clause. This is done to avoid - # a bug in MkDocs that does not display the first line of a clause - # if it contains a colon. It does not matter otherwise if the line - # is empty or not. - file.writelines(f.asStringList(1)) - - - # write nav.yml file - print(f'[green]Writing "_nav.yml"') - indentation = ' ' if addNavTitle else '' # TODO make number of spaces configurable - with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file: - if veryVerbose: - print(f'[dim]Writing navigation file') - if addNavTitle: - file.write(f'{indentation}- {navTitle}:\n') - for i, f in enumerate(outClauses): - - # TODO generate also the navigation for the first non-header clause - # if not f.title: - # if i == 0: - # file.write(f"{' '*(f.level+1)}- '': '{f.clauseNumber}.md'\n") - # continue - if not f.title: - continue - - - # TODO handle if the next clause is more than one level deeper - - _title = f.title.replace("'", '"') - nextClause = outClauses[i+1] if i+1 < len(outClauses) else None - if nextClause is None or nextClause.level <= f.level: - file.write(f"{indentation}{' '*f.level}- '{_title}': '{navTitle}/{f.clauseNumber}.md'\n") - else: - file.write(f"{indentation}{' '*f.level}- '{_title}':\n") - if len(f) > 0: - file.write(f"{indentation}{' '*nextClause.level}- '<mark>Hanging paragraph</mark>': '{navTitle}/{f.clauseNumber}.md'\n") - if verbose: - print(f'[dim]Added hanging paragraph to navigation for clause "{f.title}"') + return Document(outClauses, footnotes) def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None: @@ -542,27 +568,25 @@ def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> def processDocument(args:argparse.Namespace) -> None: global verbose, veryVerbose - document = os.path.abspath(args.document) + inDocumentFilename = os.path.abspath(args.document) veryVerbose = args.very_verbose verbose = args.verbose if veryVerbose: verbose = True # Analyse the markdown file - clauses = analyseMarkdown(document) - clauses = splitMarkdownDocument(clauses, - [ t.casefold() for t in args.ignore_clause ], - args.split_level, - args.include_title) - clauses = updateLinks(clauses) - clauses = updateNotes(clauses) - clauses = prepareForMkdocs(clauses, args.include_hanging_paragraphs) + document = analyseMarkdown(inDocumentFilename) + document.splitMarkdownDocument(args.ignore_clause, args.split_level) + document.insertFootnotes() + document.updateLinks() + document.updateNotes() + document.prepareForMkdocs(args.include_hanging_paragraphs) # Write the clauses to files - writeClauses(clauses, document, args.title, args.nav_add_title) + document.writeClausesMkDocs(inDocumentFilename, args.title, args.nav_add_title) # Copy the media files - copyMediaFiles(document, args.title, args.media_directory) + copyMediaFiles(inDocumentFilename, args.title, args.media_directory) if __name__ == '__main__':