#
#	generateTOC.py
#
#	Script to generate the table of contents for a markdown file.
#
#	(c) 2023 by Andreas Kraft
#	License: BSD 3-Clause License. See the LICENSE file for further details.
#

from __future__ import annotations
from typing import Tuple
import argparse, os, re, sys
from rich import print


tocTags:list[str] = ['[TOC]', '[CONTENT]', '<!--TOC-->']
"""	TOC tags to replace in the document with the TOC. """

def backupFile(filename:str) -> None:
	"""	Backup a file.

		Args:
			filename: The filename to backup.
	"""
	if os.path.isfile(filename):
		os.rename(filename, filename + '.bak')


def processDocument(args:argparse.Namespace) -> None:

	"""	Process the document and generate the TOC.
	
		Args:
			args: The command line arguments.
		"""

	def prepareTOClink(line:str) -> str:
		"""	Prepare a link for the TOC.
		
			Args:
				line: The line to prepare.

			Returns:
				The prepared line.
		"""

		# Remove HTML tags
		line = re.sub('<[^<]+?>', '', line)

		# Add more special characters to replace in markdown header links if necessary
		return line.lower()\
				   .replace(' ', '-')\
				   .replace('.', '')\
				   .replace(';', '')\
				   .replace('&', '%26')\
				   .replace('(', '%28')\
				   .replace(')', '%29')\
				   .replace('>', '%3E')\
				   .replace('\\[', '')\
				   .replace(']', '')\
				   .replace(':', '%3A')
	

	# Read the document
	headers:list[Tuple[str, int]] = []
	# Note: We use utf-8 and replace errors to avoid problems with special or unknown characters.
	with open(args.document, 'r', encoding='utf-8', errors='replace') as f:
		document = f.readlines()
		for line in document:
			_l = line.strip()
			if _l.startswith('#'):
				level = len(_l) - len(_l.lstrip('#')) - 1 # level is number of # - 1

				# Skip if level is to large
				if args.levels and level >= args.levels:
					continue

				# Skip the Contents headline if necessary
				if (headline := _l.lstrip('#').strip()) == 'Contents' and not args.contents:
					continue
				headers.append((headline, level))
	
	# Prepare and Print the table of contents
	toc = '# Contents\n\n'
	for h in headers:
		# _h = re.sub('\[', '%5B', h[0])
		# _h = re.sub('\]', '%5D', _h)
		print(h[0])
		_h = re.sub('\[', '\\[', h[0])
		#_h = re.sub('\]', '\\]', _h)
		heading_link=prepareTOClink(_h)
		print(_h)
		print(heading_link)
		# toc += '&nbsp;' * (h[1] * args.indent) + f'[{h[0]}](#{prepareTOClink(h[0])})  \n'
		toc += '&nbsp;' * (h[1] * args.indent) + f'[{_h}](#{heading_link})  \n'
	
	toc = re.sub('<[^<]+?>', '', toc)
	toc += '\n'

	# Write the TOC to the console
	#print(toc)

	# Add the TOC to the document
	# The TOC replaces the old TOC if it exists in the section "# Contents"
	# An outfile explicitly enables the TOC to be added to the document
	if args.addContent or args.outfile:
		outDocument = args.document if not args.outfile else args.outfile
		backupFile(outDocument)
		tocDone = False
		# Add the TOC to the document
		with open(outDocument, 'w', encoding='utf-8', errors='replace') as f:
			inToc = False
			for line in document:
				# Skip the old TOC when writing
				if inToc:
					if not line.strip().startswith('#'):
						continue
					f.write('\n') # Add a newline
					inToc = False
					tocDone = True
				
				# Write the new TOC in the right place
				if not tocDone:
					if (args.tocTags and line.strip() in tocTags) or line.strip() == '# Contents':
						inToc = True
						f.write(toc)
						continue

				# Write the rest
				f.write(line)
			


def main(args=None):

	def nonNegativeInt(value:str) -> int:
		"""Check if a value is a non-negative integer.
		
			Args:
				value: The value to check.
			
			Returns:
				The value if it is a non-negative integer.

			Raises:
				argparse.ArgumentTypeError: If the value is not a non-negative integer.
		"""
		ivalue = int(value)
		if ivalue < 0:
			raise argparse.ArgumentTypeError("%s is an invalid non-negative value" % value)
		return ivalue

	# Parse command line arguments
	parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
	parser.add_argument('--add-content', '-a', action='store_true', dest='addContent', default = False,  help = 'add TOC to "# Content" section in the document')
	parser.add_argument('--contents', '-c', action='store_true', dest='contents', default = False, help = 'add link to "Contents" section in the generated TOC')
	parser.add_argument('--indent', '-i', action='store', dest='indent', type = nonNegativeInt, default = 4, metavar = '<indent>', help = 'indent spaces for each level')
	parser.add_argument('--level', '-l', action='store', dest='levels', type = nonNegativeInt, default = 0, help = 'limit the TOC levels; 0 means no limit')
	parser.add_argument('--outfile', '-o', action='store', dest='outfile', help = 'set the output file name; if not set, the input file will be overwritten')
	parser.add_argument('--toc-tags', '-t', action='store_true', dest='tocTags', default = False, help = 'replace special TOC tokens in the document')

	parser.add_argument('document', help = 'document to parse')
	args = parser.parse_args()

	processDocument(args)

if __name__ == '__main__':
	sys.exit(main())