#
#	toMkdocs.py
#
#	(c) 2024 by Andreas Kraft
#
#	This script converts oneM2M spec markdown file to a mkdocs compatible
#	directory structure.
#
from __future__ import annotations
from enum import Enum, auto
import argparse, re, os, shutil, hashlib, base64
from dataclasses import dataclass
from rich import print

verbose = False
veryVerbose = False

class LineType(Enum):
	"""	Represents the type of a line in the markdown file. """
	HEADING = auto()
	TEXT = auto()
	CODEFENCESTART = auto()
	CODE = auto()
	CODEFENCEEND = auto()
	LIST = auto()
	NOTE = auto()
	STANDALONEIMAGE = auto()
	TABLEHEADER = auto()
	TABLESEPARATOR = auto()
	TABLEROW = auto()
	TABLELASTROW = auto()


@dataclass
class Line:
	"""	Represents a line in the markdown file. """
	text:str = ''
	lineType:LineType = LineType.TEXT



@dataclass
class Clause:
	"""	Represents a clause in the markdown file. """
	_level:int
	_clauseNumber:str
	_title:str
	_lines:list[Line]


	@property
	def level(self) -> int:
		"""	Return the level of the clause. """
		return self._level


	@property
	def clauseNumber(self) -> str:
		"""	Return the clause number. """
		return self._clauseNumber if self._clauseNumber else '0'
	

	@clauseNumber.setter
	def clauseNumber(self, value:str) -> None:
		"""	Set the clause number. """
		self._clauseNumber = value


	@property
	def title(self) -> str:
		"""	Return the title of the clause. """
		return self._title
	

	@title.setter
	def title(self, value:str) -> None:
		"""	Set the title of the clause. """
		self._title = value


	@property
	def lines(self) -> list[Line]:
		"""	Return the lines of the clause. """
		return self._lines
	

	@lines.setter
	def lines(self, value:list[Line]) -> None:
		"""	Set the lines of the clause. """
		self._lines = value
	

	@property
	def linesCount(self) -> int:
		"""	Return the number of lines in the clause.

			Returns:
				The number of lines in the clause.
		"""
		return len(self.lines)
	

	def append(self, line:Line) -> None:
		"""	Append a line to the clause.

			Args:
				line: The line to append.
		"""
		self.lines.append(line)
	

	def extend(self, clause:Clause) -> None:
		"""	Extend the clause with the lines of another clause.

			Args:
				clause: The clause to extend with.
		"""
		self.lines.extend(clause.lines)


	def asStringList(self, paddings:int = 0) -> list[str]:
		"""	Return the clause as a list of strings. 

			Args:
				paddings: The number of empty lines to add before the clause.
			Returns:
				The clause's lines as a list of strings.
		"""
		return [ '\n' for _ in range(paddings) ] + [ l.text for l in self.lines ]


	def __len__(self) -> int:
		"""	Return the number of characters in the clause.

			Returns:
				The number of characters in the clause.
		"""
		return sum([ len(l.text) for l in self.lines ])


class Footnote:
	"""	Represents a footnote in the markdown file. """
	def __init__(self, id:str, line:Line) -> None:
		self.id = id
		self.line = line


class Document:
	"""	Represents the document object. """	
	clauses:list[Clause] = []
	footnotes:list[Footnote] = []

	def __init__(self, clauses:list[Clause], footnotes:list[Footnote]) -> None:
		self.clauses = clauses
		self.footnotes = footnotes


	def splitMarkdownDocument(self, 
							  ignoreTitles:list[str] = [], 
							  splitLevel:int = 1,
							  ignoreUntilFirstHeading:bool = False) -> None:
		"""	Split the clauses at a certain level. This is used to create the separate
			markdown files for MkDocs.

			After the split, the clauses are stored in the document object.

			Args:
				ignoreTitles: A list of titles that should be ignored. They are not included in the output.
				splitLevel: The level at which the clauses should be split.
				ignoreUntilFirstHeader: Ignore all clauses until the first heading.
			
		"""
		result:list[Clause] = []

		ignoreTitles = [ t.casefold() for t in ignoreTitles ]	# convert to lower case

		for clause in self.clauses:
			level = clause.level

			# Check if the current clause should be ignored
			if clause.title.casefold() in ignoreTitles:
				continue

			# Add a new output clause if the current clause's level is 
			# equal or less than the split level
			if clause.level <= splitLevel:
				result.append(Clause(level, clause.clauseNumber, clause.title, []))
			
			# Add the lines to the output clause
			result[-1].extend(clause)
		
		# Remove the first clause if it has no title
		if ignoreUntilFirstHeading:
			while len(result[0].title) == 0:
				result.pop(0)
		
		self.clauses = result


	def insertFootnotes(self) -> None:
		"""	Insert footnotes into the clauses.

			After the insertion, the clauses are stored in the document object.
			
		"""
		print(f'[green]Adding footnotes to clauses')

		for clause in self.clauses:
			foundFootnotes:list[Footnote] = []
			for line in clause.lines:
				# ATTN: Only footnotes in normal text lines are checked
				
				if line.lineType == LineType.TEXT and (fn := _inlineFootnote.search(line.text)):
					# Find the footnote in the list of footnotes
					for f in self.footnotes:
						if f.id == fn.groups()[0]:
							foundFootnotes.append(f)

			# Insert the footnotes at the end of the clause
			if len(foundFootnotes) > 0:
				clause.append(Line('\n', LineType.TEXT))
				for f in foundFootnotes:
					clause.append(f.line)


	def updateLinks(self) -> None:
		"""	Update the links in the clauses to the new structure. This is done by
			creating a dictionary of all links and their targets and then replacing
			the links in the clauses.

			After the update, the clauses are stored in the document object.
		"""
		print(f'[green]Updating links in clauses')

		# Build the link target dictionary. Mapping anchor -> clause
		linkTargets:dict[str, Clause] = {}

		# Find all Markdown headers in the clauses and convert them to anchor format
		for i, clause in enumerate(self.clauses):
			# Find all headers in the clause
			for line in clause.lines:
				if (m := _matchHeader.match(line.text)):
					
					# convert the header to anchor format and add it to the dictionary
					# Remove special characters
					# TODO move perhaps to an own function
					anchor = m.groups()[1].strip().casefold().replace(' ', '-')
					for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'):
						anchor = anchor.replace(c, '')
					# remove html tags from the anchor
					anchor = re.sub(_htmlTag, '', anchor)

					linkTargets[f'#{anchor}'] = clause
					if veryVerbose:
						print(f'[dim]Added Markdown anchor "{anchor}"')

		# Find all HTML anchors in the clauses and add them to the dictionary
		for i, clause in enumerate(self.clauses):
			for line in clause.lines:
				if (anchors := _htmlAnchorLink.findall(line.text)):
					for a in anchors:
						linkTargets[f'#{a}'] = clause
						if veryVerbose:
							print(f'[dim]Found HTML anchor "{a}" in clause "{clause.title}"')

		# Replace the html links
		for clause in self.clauses:
			for i, line in enumerate(clause.lines):
				if (links := _htmlLink.findall(line.text)):
					for lnk in links:
						if lnk in linkTargets:
							line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[lnk].clauseNumber}/#{lnk[1:]}')	# Update the current line as well
					if veryVerbose:
						print(f'[dim]Updated HTML link "{lnk}" in clause "{clause.title}"')

		# Replace the markdown links
		for clause in self.clauses:
			for i, line in enumerate(clause.lines):
				if (links := _markdownLink.findall(line.text)):
					# Replace the old link targets with converted 
					# (lower case) versions that point to the output files
					for lnk in links:
						_lnk =lnk.casefold()
						if _lnk in linkTargets:
							line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[_lnk].clauseNumber}/#{lnk[1:]}')	# Update the current line as well
					if veryVerbose:
						print(f'[dim]Updated Markdown link "{lnk}" in clause "{clause.title}"')


	def updateNotes(self) -> None:
		"""	Update the notes in the clauses to the mkDocs notes version.

			After the update, the clauses are stored in the document object.
		"""
		print(f'[green]Updating notes in clauses')

		for clause in self.clauses:
			lines:list[Line] = []
			inNote = False
			for line in clause.lines:
				if line.lineType == LineType.NOTE:
					if not inNote:
						lines.append(Line('\n', LineType.TEXT))
						lines.append(Line('!!! note\n', LineType.NOTE))
						inNote = True
					lines.append(Line(f"\t{re.sub(_matchNoteStart, '', line.text)}", LineType.NOTE))
					if verbose:
						print(f'[dim]Converted note in clause "{clause.title}"')
				else:
					if inNote:
						lines.append(Line('\n', LineType.TEXT))
					inNote = False
					lines.append(line)
			clause.lines = lines


	def prepareForMkdocs(self, includeHangingParagraphs:bool = False) -> None:
		"""	Prepare the clauses for MkDocs. This includes removing the heading
			from the clauses and marking the clauses that are only for navigation.

			After the preparation, the clauses are stored in the document object.

			Args:
				includeHangingParagraphs: Include hanging paragraphs in the output.
		"""

		# Remove the heading from the lines. The heading is the first line
		# in the clause. This is done because MkDocs repeats the heading when
		# displaying the page.
		for clause in self.clauses:
			if clause.linesCount > 0:
				clause.lines.pop(0)
				# Also, remove the first empty lines if they exist
				while clause.linesCount > 0 and clause.lines[0].text.strip() == '':
					clause.lines.pop(0)

			# Detect and handle hanging paragraphs. This is extra text in a clause, which
		# has sub-clauses. This text is not allowed in oneM2M specifications.
		for i, clause in enumerate(self.clauses):
			if clause.level > 0 and clause.linesCount > 0:
				# Check if there is a sub-clause in the next clause
				if i + 1 < len(self.clauses) and self.clauses[i+1].level > clause.level:
					# This is a hanging paragraph. Remove the text from the current clause.
					print(f'[yellow]Hanging paragraph in clause "{clause.title}" {"(removed)" if not includeHangingParagraphs else "(kept)"}')
					if not includeHangingParagraphs:
						self.clauses[i].lines = []
					else:
						self.clauses[i].lines = [Line("<mark>Editor note: This is a hanging paragraph and it must be moved to its own clause</mark>")] + [Line()] + self.clauses[i].lines

		# Repair wrong markdown for indented lines.
		# Add 2 spaces to existing 2-space indentions
		for clause in self.clauses:
			for i, line in enumerate(clause.lines):
				if _match2spaceListIndention.match(line.text):
					clause.lines[i].text = '  ' + line.text


	def writeClausesMkDocs(self, filename:str, navTitle:str, addNavTitle:bool = False) -> None:
		"""	Write the clauses to separate files and create a navigation file.

			Args:
				filename: The name of the original markdown file.
				navTitle: The title of the navigation entry. This is used to determine the directories.
				addNavTitle: Add the title as an extra navigation level to the navigation file.
		"""

		print(f'[green]Writing clauses to files')
		# create directory first
		os.makedirs(f'{os.path.dirname(filename)}/{navTitle}', exist_ok = True)

		# Write the files
		for i, f in enumerate(self.clauses):
			# write to single files, even empty ones
			if verbose:
				print(f'[dim]Writing "{f.clauseNumber}.md" - "{f.title}"')
			with open(f'{os.path.dirname(filename)}/{navTitle}/{f.clauseNumber}.md', 'w') as file:
				# Add one empty line before the clause. This is done to avoid
				# a bug in MkDocs that does not display the first line of a clause
				# if it contains a colon. It does not matter otherwise if the line
				# is empty or not.
				file.writelines(f.asStringList(1))	

		
		# write nav.yml file
		print(f'[green]Writing "_nav.yml"')
		indentation = '  ' if addNavTitle else ''	# TODO make number of spaces configurable
		with open(f'{os.path.dirname(filename)}/_nav.yml', 'w') as file:
			if veryVerbose:
				print(f'[dim]Writing navigation file')
			if addNavTitle:
				file.write(f'{indentation}- {navTitle}:\n')
			for i, f in enumerate(self.clauses):

				if not f.title:
					print("continue")
					continue

				# TODO handle if the next clause is more than one level deeper
	
				_title = f.title.replace("'", '"')
				nextClause = self.clauses[i+1] if i+1 < len(self.clauses) else None
				if nextClause is None or nextClause.level <= f.level:
					file.write(f"{indentation}{'  '*f.level}- '{_title}': '{navTitle}/{f.clauseNumber}.md'\n")
				else:
					file.write(f"{indentation}{'  '*f.level}- '{_title}':\n")
					if len(f) > 0:
						file.write(f"{indentation}{'  '*nextClause.level}- 'Hanging paragraph': '{navTitle}/{f.clauseNumber}.md'\n")




_matchHeader = re.compile(r'(#+)\s+(.*)', re.IGNORECASE)
_matchHeaderNumber = re.compile(r'\b[A-Za-z0-9]\d*(\.\d+)*\b', re.IGNORECASE)
_matchCodefenceStart = re.compile(r'\s*```\s?.*', re.IGNORECASE)
_matchCodefenceEnd = re.compile(r'\s*```\s?', re.IGNORECASE)
_matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE)
_matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECASE)
_matchTable = re.compile(r'^\s*\|.*\|\s$', re.IGNORECASE)
_matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE)
_match2spaceListIndention = re.compile(r'^\s{2}-', re.IGNORECASE)
_markdownLink = re.compile(r'[^!]\[[^\]]*\]\((#[^)]*)\)', re.IGNORECASE)
_htmlLink = re.compile(r'<a\s+href="([^"\']*)">[^<]*</a>', re.IGNORECASE)
_htmlAnchorLink = re.compile(r'<a\s+name="([^"]*)">[^<]*</a>', re.IGNORECASE)
_htmlTag = re.compile(r'<[^>]*>', re.IGNORECASE)
_matchNoteStart = re.compile(r'^\s*>\s*(note)?\s*[:]?\s*', re.IGNORECASE)
_footnote = re.compile(r'\[\^([^\]]*)\]:', re.IGNORECASE)
_inlineFootnote = re.compile(r'\[\^([^\]]*)\]', re.IGNORECASE)


# TODO handle multiple nav levels (left bar) better (make conifgurable)


def shortHash(value:str, length:int) -> str:
	"""	Generate a short hash of a string value.

		Args:
			value: The value to hash.
			length: The length of the hash.

		Returns:
			The hash.
	"""
	return	base64.b64encode( 
				hashlib.sha256( 
					value.encode()
				).digest()
			 ).decode()[:length]


def analyseMarkdown(filename:str) -> Document:
	"""	Analyse the markdown file and split it into clauses.

		Args:
			filename: The name of the markdown file.

		Returns:
			The document object.
	"""

	print(f'[green]Analyzing "{filename}"')

	# Read the file.
	# Note: We use utf-8 and replace errors to avoid problems with special or unknown characters.
	with open(filename, 'r', encoding = 'utf-8', errors = 'replace') as file:
		inLines = file.readlines()
	
	# The list of clauses. The first clause contains the text before the first heading.
	outClauses:list[Clause] = [Clause(0, '', '', [])]
	footnotes:list[Footnote] = []

	# Go through the lines and detect headers and codefences
	inCodefence = False
	inTable = False
	tableHasSeparator = False
	for line in inLines:

		# Detect and handle codefences
		# For the moment we support only codefences that start and end
		# with 3 backticks. This is the most common way to define codefences.
		# Note, that longer codefences are allowed by the markdown specification.
  
		if _matchCodefenceStart.match(line) and not inCodefence:
			inCodefence = True
			outClauses[-1].append(Line(line, LineType.CODEFENCESTART))
			continue
		if _matchCodefenceEnd.match(line):
			inCodefence = False
			outClauses[-1].append(Line(line, LineType.CODEFENCEEND))
			continue
		if inCodefence:
			outClauses[-1].append(Line(line, LineType.CODE))
			continue

		# Detect and handle tables
		if _matchTable.match(line) and not inTable:
			inTable = True
			outClauses[-1].append(Line(line, LineType.TABLEHEADER))
			continue
		if inTable:
			if _matchTableSeparator.match(line) and not tableHasSeparator:
				outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
				tableHasSeparator = True
				continue
			elif _matchTable.match(line):
				outClauses[-1].append(Line(line, LineType.TABLEROW))
				continue
			else:
				inTable = False
				tableHasSeparator = False
				# Mark the previous line as the last row in the table
				outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW
				# continue with other matches

		# Detect notes
  		# Notes are lines that start with a '>'.
		if _matchNote.match(line):
			outClauses[-1].append(Line(line, LineType.NOTE))
			continue

		# Detect footnotes
		# Footnotes are lines that start with a '^'
		if (_fn := _footnote.match(line)):
			footnotes.append(Footnote(_fn.groups()[0], Line(line, LineType.TEXT)))
			continue

		# Detect images on a single line
		if (m := _matchStandAloneImage.match(line)):
			outClauses[-1].append(Line(line, LineType.STANDALONEIMAGE))
			continue  

		# Detect headers
		_lineType = LineType.TEXT
		if (m := _matchHeader.match(line)):
			# Add a new clause
			clauseTitle = m.groups()[1].strip()
			clauseTitle = re.sub(_htmlTag, '', clauseTitle)
			headerNumber = _matchHeaderNumber.search(clauseTitle)
			outClauses.append(Clause(len(m.groups()[0]), # level
						  		   headerNumber.group() if headerNumber else shortHash(clauseTitle, 6),
								   clauseTitle, 
								   []))
			_lineType = LineType.HEADING

		# Just add the line to the current clause as text
		outClauses[-1].append(Line(line, _lineType))

	return Document(outClauses, footnotes)


def copyMediaFiles(filename:str, navTitle:str, mediaDirectory:str = 'media') -> None:
	"""	Copy media files from the source directory to the target directory.

		Args:
			filename: The name of the markdown file.
			navTitle: The title of the navigation entry.
			mediaDirectory: The name of the media directory.
	"""
	sourceDirectory = f'{os.path.dirname(filename)}/{mediaDirectory}'
	targetDirectory = f'{os.path.dirname(filename)}/{navTitle}/{mediaDirectory}'

	if os.path.exists(sourceDirectory):
		print(f'[green]Copying media files from "{sourceDirectory}" to "{targetDirectory}"')
		shutil.copytree(sourceDirectory, targetDirectory, dirs_exist_ok = True)
	else:
		print(f'[red]Media directory "{sourceDirectory}" does not exist')

	
def processDocument(args:argparse.Namespace) -> None:
	global verbose, veryVerbose
	inDocumentFilename = os.path.abspath(args.document)
	veryVerbose = args.very_verbose
	verbose = args.verbose
	if veryVerbose:
		verbose = True

	# Analyse the markdown file
	document = analyseMarkdown(inDocumentFilename)
	document.splitMarkdownDocument(args.ignore_clause, args.split_level)
	document.insertFootnotes()
	document.updateLinks()
	document.updateNotes()
	document.prepareForMkdocs(args.include_hanging_paragraphs)

	# Write the clauses to files
	document.writeClausesMkDocs(inDocumentFilename, args.title, args.nav_add_title)

	# Copy the media files
	copyMediaFiles(inDocumentFilename, args.title, args.media_directory)


if __name__ == '__main__':
	parser = argparse.ArgumentParser(description = 'Convert oneM2M markdown specificatios to MkDocs format',
								     formatter_class = argparse.ArgumentDefaultsHelpFormatter)

	parser.add_argument('--verbose', '-v', action = 'store_true', help = 'verbose output during processing')
	parser.add_argument('--very-verbose', '-vv', action = 'store_true', help = 'very verbose output during processing')
	parser.add_argument('--ignore-clause', '-ic', metavar = 'clause', nargs = '+', default = [ 'Contents', 'History' ], help = 'ignore headers in the markdown document')
	parser.add_argument('--include-hanging-paragraphs', '-ihp', action = 'store_true', default = False, help = 'include hanging paragraphs (text in clauses with sub-clauses) in the output files')
	parser.add_argument('--include-title', '-it', action = 'store_true', help = 'include the content before the first heading in the output files as "0.md"')
	parser.add_argument('--split-level', '-sl', metavar = 'level', type = int, default = 2, help = 'on which level to split clauses to separate files')
	parser.add_argument('--media-directory', '-md', metavar = 'media-directory', default = 'media', help = 'directory name where media files are stored')
	parser.add_argument('--title', '-t', metavar = 'title', required = True, help = 'mkdocs navigation tile')
	parser.add_argument('--nav-add-title', '-nat', action = 'store_true', default = False, help = 'add the title as an extra navigation level to the navigation file')


	parser.add_argument('document', type = str, help = 'a oneM2M markdown specification document to process')
	args = parser.parse_args()
	processDocument(args)