From 9d36ce021abc5edc51f64eda3ffd95621473ff6e Mon Sep 17 00:00:00 2001
From: ankraft <an.kraft@gmail.com>
Date: Wed, 14 Jun 2023 15:46:36 +0200
Subject: [PATCH] Added generateTOC script to generate and replace TOC sections
 in markdown files

---
 generateTOC/README.md      |  32 ++++++++++++
 generateTOC/generateTOC.py | 101 +++++++++++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+)
 create mode 100644 generateTOC/README.md
 create mode 100644 generateTOC/generateTOC.py

diff --git a/generateTOC/README.md b/generateTOC/README.md
new file mode 100644
index 0000000..beae4c0
--- /dev/null
+++ b/generateTOC/README.md
@@ -0,0 +1,32 @@
+# Generate TOC for Markdown files
+
+The script will generate a TOC for a Markdown file, based on the headers in the file.
+
+It generates and prints the TOC to the console, and optionally also inserts it into the original file.
+For the latter, it will first create a backup copy of the file and then replace any section named "# Contents" with the new table of contents.
+
+
+## Prerequisites
+
+- Python 3.8 or higher
+
+## Usage
+
+```bash
+$ python generateTOC.py <document path>
+```
+
+## Command Line Options
+
+```
+usage: generateTOC.py [-h] [--add-content] [--indent <indent>] document
+
+positional arguments:
+  document              document to parse
+
+options:
+  -h, --help            show this help message and exit
+  --add-content, -a     add TOC to "# Content" section in the document (default: False)
+  --indent <indent>, -i <indent>
+                        indent spaces for each level (default: 4)
+```
\ No newline at end of file
diff --git a/generateTOC/generateTOC.py b/generateTOC/generateTOC.py
new file mode 100644
index 0000000..36f2bb4
--- /dev/null
+++ b/generateTOC/generateTOC.py
@@ -0,0 +1,101 @@
+#
+#	generateTOC.py
+#
+#	Script to generate the table of contents for a markdown file.
+#
+#	(c) 2023 by Andreas Kraft
+#	License: BSD 3-Clause License. See the LICENSE file for further details.
+#
+
+from __future__ import annotations
+from typing import Tuple
+import argparse, os, re
+from rich import print
+
+
+def backupFile(filename:str) -> None:
+	"""	Backup a file.
+
+		Args:
+			filename: The filename to backup.
+	"""
+	if os.path.isfile(filename):
+		os.rename(filename, filename + '.bak')
+
+
+def processDocument(args:argparse.Namespace) -> None:
+
+	def prepareTOClink(line:str) -> str:
+		"""Prepare a link for the TOC"""
+
+		# Remove HTML tags
+		line = re.sub('<[^<]+?>', '', line)
+
+		# Add more special characters to replace in markdown header links if necessary
+		return line.lower()\
+				   .replace(' ', '-')\
+				   .replace('.', '')\
+				   .replace(';', '')\
+				   .replace('&', '%26')\
+				   .replace('(', '%28')\
+				   .replace(')', '%29')\
+				   .replace('>', '%3E')\
+				   .replace(':', '%3A')
+	
+
+	# Read the document
+	headers:list[Tuple[str, int]] = []
+	with open(args.document, 'r') as f:
+		document = f.readlines()
+		for line in document:
+			_l = line.strip()
+			if _l.startswith('#'):
+				level = len(_l) - len(_l.lstrip('#')) - 1 # level is number of # - 1
+				headers.append((_l.lstrip('#').strip(), level))
+	
+	# Prepare and Print the table of contents
+	to = '# Contents\n\n'
+	for h in headers:
+		to += '&nbsp;' * (h[1] * args.indent) + f'[{h[0]}](#{prepareTOClink(h[0])})  \n'
+	to = re.sub('<[^<]+?>', '', to)
+	
+	# Write the TOC to the console
+	print(to)
+
+	# Add the TOC to the document
+	# The TOC replaces the old TOC if it exists in the section "# Contents"
+	if args.addContent:
+		backupFile(args.document)
+		# Add the TOC to the document
+		with open(args.document, 'w') as f:
+			inToc = False
+			for line in document:
+				# Skip the old TOC when writing
+				if inToc:
+					if not line.strip().startswith('#'):
+						continue
+					inToc = False
+				
+				# Write the new TOC
+				if line.strip() == '# Contents':
+					inToc = True
+					f.write(to)
+					continue
+
+				# Write the rest
+				f.write(line)
+			
+
+
+if __name__ == '__main__':
+
+	# Parse command line arguments
+	parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+	parser.add_argument('--add-content', '-a', action='store_true', dest='addContent', default = False,  help = 'add TOC to "# Content" section in the document')
+	parser.add_argument('--indent', '-i', action='store', dest='indent', default = 4, metavar = '<indent>', help = 'indent spaces for each level')
+
+	parser.add_argument('document', help = 'document to parse')
+	args = parser.parse_args()
+
+	processDocument(args)
+
-- 
GitLab