#
#	markdownTools.py
#
#	(c) 2025 by Andreas Kraft & Miguel Angel Reina Ortega
#	License: BSD 3-Clause License. See the LICENSE file for further details.



""" Various tools for markdown processing
"""
from __future__ import annotations
from typing import Callable, Optional

from dataclasses import dataclass
import base64, hashlib
from enum import Enum, auto

from gridTableTools import generateHtmlTableWithSpans, setLoggers as setGridTableLoggers
from regexMatches import *

# TODO use a verbosity level instead
verbose = False
veryVerbose = False

printInfo = print
printDebug = print
printError = print

def setLoggers(info:Callable = print, debug:Callable = print, error:Callable= print) -> None:
	global printInfo, printDebug, printError

	printInfo = info
	printDebug = debug
	printError = error

	# Set the loggers for the grid table tools
	setGridTableLoggers(info, debug, error)



def _shortHash(value:str, length:int) -> str:
	"""	Generate a short hash of a string value.

		Args:
			value: The value to hash.
			length: The length of the hash.

		Returns:
			The hash.
	"""
	return	base64.b64encode( 
				hashlib.sha256( 
					value.encode()
				).digest()
			 ).decode()[:length]

	
class LineType(Enum):
	"""	Represents the type of a line in the markdown file. """
	HEADING = auto()
	TEXT = auto()
	CODEFENCESTART = auto()
	CODE = auto()
	CODEFENCEEND = auto()
	LIST = auto()
	NOTE = auto()
	STANDALONEIMAGE = auto()
	TABLEHEADER = auto()
	TABLESEPARATOR = auto()
	TABLEROW = auto()
	TABLELASTROW = auto()
	RAWHTML = auto()


@dataclass
class Line:
	"""	Represents a line in the markdown file. """
	text:str = '\n'
	lineType:LineType = LineType.TEXT


	def __str__(self) -> str:
		"""	Return the line as a string. """
		return self.text
	

	def __repr__(self) -> str:
		"""	Return the line as a string. """
		return self.__str__()


@dataclass
class Clause:
	"""	Represents a clause in the markdown file. """
	_level:int
	_clauseNumber:str
	_title:str
	_lines:list[Line]


	@property
	def level(self) -> int:
		"""	Return the level of the clause. """
		return self._level


	@property
	def clauseNumber(self) -> str:
		"""	Return the clause number. """
		return self._clauseNumber if self._clauseNumber else '0'
	

	@clauseNumber.setter
	def clauseNumber(self, value:str) -> None:
		"""	Set the clause number. """
		self._clauseNumber = value


	@property
	def title(self) -> str:
		"""	Return the title of the clause. """
		return self._title
	

	@title.setter
	def title(self, value:str) -> None:
		"""	Set the title of the clause. """
		self._title = value


	@property
	def lines(self) -> list[Line]:
		"""	Return the lines of the clause. """
		return self._lines
	

	@lines.setter
	def lines(self, value:list[Line]) -> None:
		"""	Set the lines of the clause. """
		self._lines = value
	

	@property
	def linesCount(self) -> int:
		"""	Return the number of lines in the clause.

			Returns:
				The number of lines in the clause.
		"""
		return len(self.lines)
	

	def append(self, line:Line) -> None:
		"""	Append a line to the clause.

			Args:
				line: The line to append.
		"""
		self.lines.append(line)
	

	def extend(self, clause:Clause) -> None:
		"""	Extend the clause with the lines of another clause.

			Args:
				clause: The clause to extend with.
		"""
		self.lines.extend(clause.lines)


	def asStringList(self, paddings:int = 0) -> list[str]:
		"""	Return the clause as a list of strings. 

			Args:
				paddings: The number of empty lines to add before the clause.
			Returns:
				The clause's lines as a list of strings.
		"""
		return [ '\n' for _ in range(paddings) ] + [ l.text for l in self.lines ]


	def __len__(self) -> int:
		"""	Return the number of characters in the clause. This does not include
			empty lines or lines that contain only whitespace.

			Returns:
				The number of characters in the clause.
		"""
		return sum([ len(l.text.strip()) for l in self.lines ])
	

	def __str__(self) -> str:
		"""	Return the clause as a string. """
		return ''.join([str(l) for l in self.lines ])
	

	def __repr__(self) -> str:
		"""	Return the clause as a string. """
		return self.__str__()



class Footnote:
	"""	Represents a footnote in the markdown file. """
	def __init__(self, id:str, line:Line) -> None:
		"""	Constructor. 

			Args:
				id: The id of the footnote.
				line: The line of the footnote.
		"""
		self.id = id
		""" The id of the footnote. """
		
		self.line = line
		""" The line of the footnote. """
	
	def __str__(self) -> str:
		return self.line.text


	def __repr__(self) -> str:
		return self.__str__()

class Document:
	"""	Represents the document object. """	
	clauses:list[Clause] = []
	footnotes:list[Footnote] = []

	def __init__(self, clauses:list[Clause], footnotes:list[Footnote] = []) -> None:
		self.clauses = clauses
		self.footnotes = footnotes


	def splitMarkdownDocument(self, 
							  ignoreTitles:list[str] = [], 
							  splitLevel:int = 1,
							  ignoreUntilFirstHeading:bool = False) -> None:
		"""	Split the clauses at a certain level. This is used to create the separate
			markdown files for MkDocs.

			After the split, the clauses are stored in the document object.

			Args:
				ignoreTitles: A list of titles that should be ignored. They are not included in the output.
				splitLevel: The level at which the clauses should be split.
				ignoreUntilFirstHeader: Ignore all clauses until the first heading.
			
		"""
		result:list[Clause] = []

		ignoreTitles = [ t.casefold() for t in ignoreTitles ]	# convert to lower case

		for clause in self.clauses:
			level = clause.level

			# Check if the current clause should be ignored
			if clause.title.casefold() in ignoreTitles:
				continue

			# Add a new output clause if the current clause's level is 
			# equal or less than the split level
			if clause.level <= splitLevel:
				result.append(Clause(level, clause.clauseNumber, clause.title, []))
			
			# Add the lines to the output clause
			result[-1].extend(clause)
		
		# Remove the first clause if it has no title
		if ignoreUntilFirstHeading:
			while len(result[0].title) == 0:
				result.pop(0)
		
		self.clauses = result


	def insertFootnotes(self) -> None:
		"""	Insert footnotes into the clauses.

			After the insertion, the clauses are stored in the document object.
			
		"""
		printInfo('Adding footnotes to clauses')

		for clause in self.clauses:
			foundFootnotes:list[Footnote] = []
			for line in clause.lines:
				# ATTN: Only footnotes in normal text lines are checked
				
				if line.lineType == LineType.TEXT and (fn := MatchInlineFootnote.search(line.text)):
					# Find the footnote in the list of footnotes
					for f in self.footnotes:
						if f.id == fn.groups()[0]:
							foundFootnotes.append(f)

			# Insert the footnotes at the end of the clause
			if len(foundFootnotes) > 0:
				clause.append(Line('\n', LineType.TEXT))
				for f in foundFootnotes:
					clause.append(f.line)


	def updateLinks(self) -> None:
		"""	Update the links in the clauses to the new structure. This is done by
			creating a dictionary of all links and their targets and then replacing
			the links in the clauses.

			After the update, the clauses are stored in the document object.
		"""
		printInfo('Updating links in clauses')

		# Build the link target dictionary. Mapping anchor -> clause
		linkTargets:dict[str, Clause] = {}

		# Find all Markdown headers in the clauses and convert them to anchor format
		for i, clause in enumerate(self.clauses):
			# Find all headers in the clause
			for line in clause.lines:
				if (m := matchHeader.match(line.text)):
					
					# convert the header to anchor format and add it to the dictionary
					# Remove special characters
					# TODO move perhaps to an own function
					anchor = m.groups()[1].strip().casefold().replace(' ', '-')
					for c in ( '.', '(', ')', '[', ']', ':', ',', "'", '"'):
						anchor = anchor.replace(c, '')
					# remove html tags from the anchor
					anchor = re.sub(matchHtmlTag, '', anchor)

					linkTargets[f'#{anchor}'] = clause
					if veryVerbose:
						printDebug(f'Added Markdown anchor "{anchor}"')

		# Find all HTML anchors in the clauses and add them to the dictionary
		for i, clause in enumerate(self.clauses):
			for line in clause.lines:
				if (anchors := matchHtmlAnchorLink.findall(line.text)):
					for a in anchors:
						linkTargets[f'#{a}'] = clause
						if veryVerbose:
							printDebug(f'Found HTML anchor "{a}" in clause "{clause.title}"')

		# Replace the html links
		for clause in self.clauses:
			for i, line in enumerate(clause.lines):
				if (links := matchHtmlLink.findall(line.text)):
					for lnk in links:
						if lnk in linkTargets:
							line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[lnk].clauseNumber}/#{lnk[1:]}')	# Update the current line as well
					if veryVerbose:
						printDebug(f'Updated HTML link "{lnk}" in clause "{clause.title}"')

		# Replace the markdown links
		for clause in self.clauses:
			for i, line in enumerate(clause.lines):
				if (links := markdownLink.findall(line.text)):
					# Replace the old link targets with converted 
					# (lower case) versions that point to the output files
					for lnk in links:
						_lnk =lnk.casefold()
						if _lnk in linkTargets:
							line.text = clause.lines[i].text = line.text.replace(lnk, f'../{linkTargets[_lnk].clauseNumber}/#{lnk[1:]}')	# Update the current line as well
					if veryVerbose:
						printDebug(f'Updated Markdown link "{lnk}" in clause "{clause.title}"')


	def updateNotes(self) -> None:
		"""	Update the notes in the clauses to the mkDocs notes version.

			After the update, the clauses are stored in the document object.
		"""
		printInfo('Updating notes in clauses')

		for clause in self.clauses:
			lines:list[Line] = []
			inNote = False
			for line in clause.lines:
				if line.lineType == LineType.NOTE:
					if not inNote:
						lines.append(Line('\n', LineType.TEXT))
						lines.append(Line('!!! note\n', LineType.NOTE))
						inNote = True
					lines.append(Line(f"\t{re.sub(matchNoteStart, '', line.text)}", LineType.NOTE))
					if verbose:
						printDebug(f'Converted note in clause "{clause.title}"')
				else:
					if inNote:
						lines.append(Line('\n', LineType.TEXT))
					inNote = False
					lines.append(line)
			clause.lines = lines


	def __str__(self) -> str:
		"""	Return the document as a string. """
		return '\n'.join([ str(c) for c in self.clauses + self.footnotes ])
	

	def __repr__(self) -> str:
		"""	Return the document as a string. """
		return self.__str__()


def analyseMarkdown(filename:Optional[str]=None, inLines:Optional[list[str]]=None) -> Document:
	"""	Analyse the markdown file and split it into clauses.
		Either the filename or the inLines must be provided.

		Args:
			filename: The name of the markdown file.
			inLines: The lines of the markdown file.

		Returns:
			The document object.
	"""

	gridTable:str = ''

	def processGridTable() -> None:
		"""	Process a grid table and convert it to an html table.

			This function adds the html table to the output clauses and
			clears the gridTable variable.
		"""
		nonlocal gridTable
		
		htmltable:str = ''
		try:
			htmltable = generateHtmlTableWithSpans(gridTable)
			printDebug(htmltable)
		except Exception as e:
			printError(f"Error: {e}")
			htmltable = f'<mark>Conversion error: {e}</mark>\n'
		outClauses[-1].append(Line(htmltable, LineType.RAWHTML))
		gridTable = ''


	printInfo(f'Analyzing "{filename}"')

	# Read the file.
	# Note: We use utf-8 and replace errors to avoid problems with special or unknown characters.
	if filename and not inLines:
		with open(filename, 'r', encoding = 'utf-8', errors = 'replace') as file:
			inLines = file.readlines()
	elif not filename and inLines:
		pass
	else:
		raise ValueError('Either the filename or the lines must be provided.')
	
	# The list of clauses. The first clause contains the text before the first heading.
	outClauses:list[Clause] = [Clause(0, '', '', [])]
	footnotes:list[Footnote] = []

	# Go through the lines and detect headers and codefences
	inCodefence = False
	inTable = False
	tableHasSeparator = False
	inGridTable = False
	for line in inLines:
		# Detect and handle codefences
		# For the moment we support only codefences that start and end
		# with 3 backticks. This is the most common way to define codefences.
		# Note, that longer codefences are allowed by the markdown specification.
  
		if matchCodefenceStart.match(line) and not inCodefence:
			inCodefence = True
			outClauses[-1].append(Line(line, LineType.CODEFENCESTART))
			continue
		if matchCodefenceEnd.match(line):
			inCodefence = False
			outClauses[-1].append(Line(line, LineType.CODEFENCEEND))
			continue
		if inCodefence:
			outClauses[-1].append(Line(line, LineType.CODE))
			continue

		# Detect and handle tables
		if matchTable.match(line) and not inTable and not inGridTable:
			inTable = True
			outClauses[-1].append(Line(line, LineType.TABLEHEADER))
			continue
		if inTable:
			if matchTableSeparator.match(line) and not tableHasSeparator:
				outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
				tableHasSeparator = True
				continue
			elif matchTable.match(line):
				outClauses[-1].append(Line(line, LineType.TABLEROW))
				continue
			else:
				inTable = False
				tableHasSeparator = False
				# Mark the previous line as the last row in the table
				outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW
				# continue with other matches

		#Detect grid tables and convert them to html table
		if matchGridTable.match(line) and not inGridTable:
			inGridTable = True
			#outClauses[-1].append(Line(line, LineType.TABLEHEADER))
			gridTable += line
			continue
		if inGridTable:
			if matchGridTableHeaderSeparator.match(line) or matchGridTableBodySeparator.match(line):
				#outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
				gridTable += line
				continue
			elif matchTable.match(line):
				#outClauses[-1].append(Line(line, LineType.TABLEROW))
				gridTable += line
				continue
			else:
				inGridTable = False
				processGridTable()
		# continue with other matches

		# Detect notes
		# Notes are lines that start with a '>'.
		if matchNote.match(line):
			outClauses[-1].append(Line(line, LineType.NOTE))
			continue

		# Detect footnotes
		# Footnotes are lines that start with a '^'
		if (_fn := matchFootnote.match(line)):
			footnotes.append(Footnote(_fn.groups()[0], Line(line, LineType.TEXT)))
			continue

		# Detect images on a single line
		if (m := matchStandAloneImage.match(line)):
			outClauses[-1].append(Line(line, LineType.STANDALONEIMAGE))
			continue  

		# Detect headers
		_lineType = LineType.TEXT
		if (m := matchHeader.match(line)):
			# Add a new clause
			clauseTitle = m.groups()[1].strip()
			clauseTitle = re.sub(matchHtmlTag, '', clauseTitle)
			headerNumber = matchHeaderNumber.search(clauseTitle)
			outClauses.append(Clause(len(m.groups()[0]), # level
								   headerNumber.group() if headerNumber else _shortHash(clauseTitle, 6),
								   clauseTitle, 
								   []))
			_lineType = LineType.HEADING

		# Just add the line to the current clause as text
		outClauses[-1].append(Line(line, _lineType))

	# Process still unfinished cases
	if gridTable:
		processGridTable()

	return Document(outClauses, footnotes)


def main() -> None:
    """Hauptfunktion zur Verarbeitung von Markdown-Dateien über die Kommandozeile."""
    import argparse
    
    parser = argparse.ArgumentParser(description='Markdown-Dateien verarbeiten, um Gittertabellen zu konvertieren und andere Formatierungen zu handhaben')
    parser.add_argument('eingabe', help='Eingabe-Markdown-Datei')
    parser.add_argument('-v', '--verbose', action='store_true', help='Ausführliche Ausgabe aktivieren')
    parser.add_argument('-vv', '--sehr-verbose', action='store_true', help='Sehr ausführliche Ausgabe aktivieren')
    parser.add_argument('-i', '--ignoriere-titel', nargs='+', default=[], help='Liste der zu ignorierenden Titel')
    parser.add_argument('-s', '--teilungs-ebene', type=int, default=1, help='Ebene, auf der das Dokument geteilt werden soll (Standard: 1)')
    parser.add_argument('-f', '--ignoriere-erste', action='store_true', help='Inhalt bis zur ersten Überschrift ignorieren')
    
    args = parser.parse_args()
    
    # Verbositätsebenen setzen
    global verbose, veryVerbose
    verbose = args.verbose
    veryVerbose = args.sehr_verbose
    
    # Markdown-Datei verarbeiten
    doc = analyseMarkdown(args.eingabe)
    
    # Dokument teilen und verarbeiten
    doc.splitMarkdownDocument(
        ignoreTitles=args.ignoriere_titel,
        splitLevel=args.teilungs_ebene,
        ignoreUntilFirstHeading=args.ignoriere_erste
    )
    
    # Dokumentenelemente aktualisieren
    doc.insertFootnotes()
    doc.updateLinks()
    doc.updateNotes()
    
    # Verarbeitetes Dokument ausgeben
    for clause in doc.clauses:
        print(f"\n{'#' * clause.level} {clause.title}")
        for line in clause.lines:
            print(line.text, end='')

if __name__ == '__main__':
    main()