From 9d36ce021abc5edc51f64eda3ffd95621473ff6e Mon Sep 17 00:00:00 2001 From: ankraft <an.kraft@gmail.com> Date: Wed, 14 Jun 2023 15:46:36 +0200 Subject: [PATCH] Added generateTOC script to generate and replace TOC sections in markdown files --- generateTOC/README.md | 32 ++++++++++++ generateTOC/generateTOC.py | 101 +++++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 generateTOC/README.md create mode 100644 generateTOC/generateTOC.py diff --git a/generateTOC/README.md b/generateTOC/README.md new file mode 100644 index 0000000..beae4c0 --- /dev/null +++ b/generateTOC/README.md @@ -0,0 +1,32 @@ +# Generate TOC for Markdown files + +The script will generate a TOC for a Markdown file, based on the headers in the file. + +It generates and prints the TOC to the console, and optionally also inserts it into the original file. +For the latter, it will first create a backup copy of the file and then replace any section named "# Contents" with the new table of contents. + + +## Prerequisites + +- Python 3.8 or higher + +## Usage + +```bash +$ python generateTOC.py <document path> +``` + +## Command Line Options + +``` +usage: generateTOC.py [-h] [--add-content] [--indent <indent>] document + +positional arguments: + document document to parse + +options: + -h, --help show this help message and exit + --add-content, -a add TOC to "# Content" section in the document (default: False) + --indent <indent>, -i <indent> + indent spaces for each level (default: 4) +``` \ No newline at end of file diff --git a/generateTOC/generateTOC.py b/generateTOC/generateTOC.py new file mode 100644 index 0000000..36f2bb4 --- /dev/null +++ b/generateTOC/generateTOC.py @@ -0,0 +1,101 @@ +# +# generateTOC.py +# +# Script to generate the table of contents for a markdown file. +# +# (c) 2023 by Andreas Kraft +# License: BSD 3-Clause License. See the LICENSE file for further details. +# + +from __future__ import annotations +from typing import Tuple +import argparse, os, re +from rich import print + + +def backupFile(filename:str) -> None: + """ Backup a file. + + Args: + filename: The filename to backup. + """ + if os.path.isfile(filename): + os.rename(filename, filename + '.bak') + + +def processDocument(args:argparse.Namespace) -> None: + + def prepareTOClink(line:str) -> str: + """Prepare a link for the TOC""" + + # Remove HTML tags + line = re.sub('<[^<]+?>', '', line) + + # Add more special characters to replace in markdown header links if necessary + return line.lower()\ + .replace(' ', '-')\ + .replace('.', '')\ + .replace(';', '')\ + .replace('&', '%26')\ + .replace('(', '%28')\ + .replace(')', '%29')\ + .replace('>', '%3E')\ + .replace(':', '%3A') + + + # Read the document + headers:list[Tuple[str, int]] = [] + with open(args.document, 'r') as f: + document = f.readlines() + for line in document: + _l = line.strip() + if _l.startswith('#'): + level = len(_l) - len(_l.lstrip('#')) - 1 # level is number of # - 1 + headers.append((_l.lstrip('#').strip(), level)) + + # Prepare and Print the table of contents + to = '# Contents\n\n' + for h in headers: + to += ' ' * (h[1] * args.indent) + f'[{h[0]}](#{prepareTOClink(h[0])}) \n' + to = re.sub('<[^<]+?>', '', to) + + # Write the TOC to the console + print(to) + + # Add the TOC to the document + # The TOC replaces the old TOC if it exists in the section "# Contents" + if args.addContent: + backupFile(args.document) + # Add the TOC to the document + with open(args.document, 'w') as f: + inToc = False + for line in document: + # Skip the old TOC when writing + if inToc: + if not line.strip().startswith('#'): + continue + inToc = False + + # Write the new TOC + if line.strip() == '# Contents': + inToc = True + f.write(to) + continue + + # Write the rest + f.write(line) + + + +if __name__ == '__main__': + + # Parse command line arguments + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--add-content', '-a', action='store_true', dest='addContent', default = False, help = 'add TOC to "# Content" section in the document') + parser.add_argument('--indent', '-i', action='store', dest='indent', default = 4, metavar = '<indent>', help = 'indent spaces for each level') + + parser.add_argument('document', help = 'document to parse') + args = parser.parse_args() + + processDocument(args) + -- GitLab