Skip to content
Snippets Groups Projects
Commit 9d36ce02 authored by Andreas Kraft's avatar Andreas Kraft
Browse files

Added generateTOC script to generate and replace TOC sections in markdown files

parent 1ea0f0c9
No related branches found
No related tags found
No related merge requests found
# Generate TOC for Markdown files
The script will generate a TOC for a Markdown file, based on the headers in the file.
It generates and prints the TOC to the console, and optionally also inserts it into the original file.
For the latter, it will first create a backup copy of the file and then replace any section named "# Contents" with the new table of contents.
## Prerequisites
- Python 3.8 or higher
## Usage
```bash
$ python generateTOC.py <document path>
```
## Command Line Options
```
usage: generateTOC.py [-h] [--add-content] [--indent <indent>] document
positional arguments:
document document to parse
options:
-h, --help show this help message and exit
--add-content, -a add TOC to "# Content" section in the document (default: False)
--indent <indent>, -i <indent>
indent spaces for each level (default: 4)
```
\ No newline at end of file
#
# generateTOC.py
#
# Script to generate the table of contents for a markdown file.
#
# (c) 2023 by Andreas Kraft
# License: BSD 3-Clause License. See the LICENSE file for further details.
#
from __future__ import annotations
from typing import Tuple
import argparse, os, re
from rich import print
def backupFile(filename:str) -> None:
""" Backup a file.
Args:
filename: The filename to backup.
"""
if os.path.isfile(filename):
os.rename(filename, filename + '.bak')
def processDocument(args:argparse.Namespace) -> None:
def prepareTOClink(line:str) -> str:
"""Prepare a link for the TOC"""
# Remove HTML tags
line = re.sub('<[^<]+?>', '', line)
# Add more special characters to replace in markdown header links if necessary
return line.lower()\
.replace(' ', '-')\
.replace('.', '')\
.replace(';', '')\
.replace('&', '%26')\
.replace('(', '%28')\
.replace(')', '%29')\
.replace('>', '%3E')\
.replace(':', '%3A')
# Read the document
headers:list[Tuple[str, int]] = []
with open(args.document, 'r') as f:
document = f.readlines()
for line in document:
_l = line.strip()
if _l.startswith('#'):
level = len(_l) - len(_l.lstrip('#')) - 1 # level is number of # - 1
headers.append((_l.lstrip('#').strip(), level))
# Prepare and Print the table of contents
to = '# Contents\n\n'
for h in headers:
to += '&nbsp;' * (h[1] * args.indent) + f'[{h[0]}](#{prepareTOClink(h[0])}) \n'
to = re.sub('<[^<]+?>', '', to)
# Write the TOC to the console
print(to)
# Add the TOC to the document
# The TOC replaces the old TOC if it exists in the section "# Contents"
if args.addContent:
backupFile(args.document)
# Add the TOC to the document
with open(args.document, 'w') as f:
inToc = False
for line in document:
# Skip the old TOC when writing
if inToc:
if not line.strip().startswith('#'):
continue
inToc = False
# Write the new TOC
if line.strip() == '# Contents':
inToc = True
f.write(to)
continue
# Write the rest
f.write(line)
if __name__ == '__main__':
# Parse command line arguments
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--add-content', '-a', action='store_true', dest='addContent', default = False, help = 'add TOC to "# Content" section in the document')
parser.add_argument('--indent', '-i', action='store', dest='indent', default = 4, metavar = '<indent>', help = 'indent spaces for each level')
parser.add_argument('document', help = 'document to parse')
args = parser.parse_args()
processDocument(args)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment