Added generateTOC script to generate and replace TOC sections in markdown files

9d36ce02 · Andreas Kraft · 1ea0f0c9 · 9d36ce02 · 9d36ce02
Commit 9d36ce02 authored 2 years ago by Andreas Kraft
--- a/generateTOC/README.md
+++ b/generateTOC/README.md
+# Generate TOC for Markdown files
+The script will generate a TOC for a Markdown file, based on the headers in the file.
+It generates and prints the TOC to the console, and optionally also inserts it into the original file.
+For the latter, it will first create a backup copy of the file and then replace any section named "# Contents" with the new table of contents.
+## Prerequisites
+- Python 3.8 or higher
+## Usage
+```bash
+$ python generateTOC.py <document path>
+```
+## Command Line Options
+```
+usage: generateTOC.py [-h] [--add-content] [--indent <indent>] document
+positional arguments:
+  document              document to parse
+options:
+  -h, --help            show this help message and exit
+  --add-content, -a     add TOC to "# Content" section in the document (default: False)
+  --indent <indent>, -i <indent>
+                        indent spaces for each level (default: 4)
+```
\ No newline at end of file
--- a/generateTOC/generateTOC.py
+++ b/generateTOC/generateTOC.py
+#
+#	generateTOC.py
+#
+#	Script to generate the table of contents for a markdown file.
+#
+#	(c) 2023 by Andreas Kraft
+#	License: BSD 3-Clause License. See the LICENSE file for further details.
+#
+from __future__ import annotations
+from typing import Tuple
+import argparse, os, re
+from rich import print
+def backupFile(filename:str) -> None:
+	"""	Backup a file.
+		Args:
+			filename: The filename to backup.
+	"""
+	if os.path.isfile(filename):
+		os.rename(filename, filename + '.bak')
+def processDocument(args:argparse.Namespace) -> None:
+	def prepareTOClink(line:str) -> str:
+		"""Prepare a link for the TOC"""
+		# Remove HTML tags
+		line = re.sub('<[^<]+?>', '', line)
+		# Add more special characters to replace in markdown header links if necessary
+		return line.lower()\
+				   .replace(' ', '-')\
+				   .replace('.', '')\
+				   .replace(';', '')\
+				   .replace('&', '%26')\
+				   .replace('(', '%28')\
+				   .replace(')', '%29')\
+				   .replace('>', '%3E')\
+				   .replace(':', '%3A')
+	# Read the document
+	headers:list[Tuple[str, int]] = []
+	with open(args.document, 'r') as f:
+		document = f.readlines()
+		for line in document:
+			_l = line.strip()
+			if _l.startswith('#'):
+				level = len(_l) - len(_l.lstrip('#')) - 1 # level is number of # - 1
+				headers.append((_l.lstrip('#').strip(), level))
+	# Prepare and Print the table of contents
+	to = '# Contents\n\n'
+	for h in headers:
+		to += '&nbsp;' * (h[1] * args.indent) + f'[{h[0]}](#{prepareTOClink(h[0])})  \n'
+	to = re.sub('<[^<]+?>', '', to)
+	# Write the TOC to the console
+	print(to)
+	# Add the TOC to the document
+	# The TOC replaces the old TOC if it exists in the section "# Contents"
+	if args.addContent:
+		backupFile(args.document)
+		# Add the TOC to the document
+		with open(args.document, 'w') as f:
+			inToc = False
+			for line in document:
+				# Skip the old TOC when writing
+				if inToc:
+					if not line.strip().startswith('#'):
+						continue
+					inToc = False
+				# Write the new TOC
+				if line.strip() == '# Contents':
+					inToc = True
+					f.write(to)
+					continue
+				# Write the rest
+				f.write(line)
+if __name__ == '__main__':
+	# Parse command line arguments
+	parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+	parser.add_argument('--add-content', '-a', action='store_true', dest='addContent', default = False,  help = 'add TOC to "# Content" section in the document')
+	parser.add_argument('--indent', '-i', action='store', dest='indent', default = 4, metavar = '<indent>', help = 'indent spaces for each level')
+	parser.add_argument('document', help = 'document to parse')
+	args = parser.parse_args()
+	processDocument(args)