#
#	pandocFilter.py
#
#	Script to convert a oneM2M Markdown file to pandoc input format
#
#	(c) 2023 by Andreas Kraft, Miguel Ortega
#	License: BSD 3-Clause License. See the LICENSE file for further details.
#

import argparse, os, re, sys
from rich import print
from rich.progress import Progress, TextColumn, TimeElapsedColumn

def readMDFile(progress:Progress, document:str) -> list[str]:
	"""	Read the markdown file and return a list of lines.
	"""	
	_taskID = progress.add_task('[blue]Reading document', start=False, total=0)

	# Check if file exists
	if not os.path.exists(document):
		print(f'File {document} does not exist')
		exit(1)

	# Read the file
	with open(document, 'r', encoding='utf-8', errors = 'replace') as f:
		progress.stop_task(_taskID)
		return f.readlines()
	

def writeMDFile(progress:Progress, mdLines:list[str], document:str, outDirectory:str) -> None:
	"""	Write the markdown file.

	"""
	_taskID = progress.add_task('[blue]Writing document', start=False, total=0)

	# Write the file
	with open(f'{outDirectory}/{os.path.basename(document)}', 'w', encoding='utf-8', errors = 'replace') as f:
		f.writelines(mdLines)
	progress.stop_task(_taskID)


def correctTOC(progress:Progress, mdLines:list[str], tocSection:str = 'Contents') -> list[str]:
	"""	Correct the TOC to be compatible with pandoc.
	"""
	_taskID = progress.add_task('[blue]Correcting TOC', start=False, total=0)

	_contents = f'# {tocSection}\n'
	tocregex = re.compile('^(.*\[.*\])')
	
	_lines:list[str] = []
	_inTOC = False
	for line in mdLines:
		# find TOC section first
		if line == _contents:
			_inTOC = True
			_lines.append(line)
			continue
		if _inTOC:
			if line.startswith('#'):	# End of TOC?
				_inTOC = False
				_lines.append(f'\n')
				_lines.append(line)
				continue
			matches = re.findall(tocregex, line)	# Replace entry
			if matches:
				_lines.append(f'{matches[0]}  \n')
				continue
		else:
			_lines.append(line)

	progress.stop_task(_taskID)
	return _lines


def replaceTableCaptions(progress:Progress, mdLines:list[str]) -> list[str]:
	"""	Replace table captions with a pandoc table caption.
	"""
	_taskID = progress.add_task('[blue]Replacing table captions', start=False, total=0)
	# progress.update()
	tableregex = re.compile('^\*\*(Table .*)\*\*')
	
	_lines:list[str] = []
	for line in mdLines:
		matches = re.findall(tableregex, line)
		if matches:
			# move the caption to the beginning of the table and add a "Table:" prefix
			_idx = len(_lines) - 1
			while _idx >= 0 and _lines[_idx].startswith('|'):
				_idx -= 1
			#if _idx > 0:
			#	_lines.insert(_idx+1, f'Table: {matches[0]}\n')
			_lines.insert(_idx+1, f'Table: {matches[0]}\n')
		else:
			_lines.append(line)

	#print(_lines)
	progress.stop_task(_taskID)
	return _lines


def replaceFigureCaptions(progress:Progress, mdLines:list[str]) -> list[str]:
	"""	Replace table captions with a pandoc table caption.
	"""
	_taskID = progress.add_task('[blue]Replacing figure captions', start=False, total=0)
	# progress.update()
	figureregex = re.compile('^\*\*(Figure .*)\*\*')
	
	_lines:list[str] = []
	for line in mdLines:
		matches = re.findall(figureregex, line)
		if matches:
			# Replace the previous figure markdown name with the captions
			_idx = len(_lines) - 1
			while _idx >= 0 and not _lines[_idx].startswith('!['):
				_idx -= 1
			if _idx > 0:
				_lines[_idx] = re.sub(r'^.*?]', f'![{matches[0]}]', _lines[_idx])

		else:
			_lines.append(line)

	progress.stop_task(_taskID)
	return _lines



def process(document:str, outDirectory:str) -> None:
	with Progress(TextColumn('{task.description}'),  TimeElapsedColumn()) as progress:
		mdLines = readMDFile(progress, document)
		mdLines = correctTOC(progress, mdLines)
		mdLines = replaceTableCaptions(progress, mdLines)
		mdLines = replaceFigureCaptions(progress, mdLines)
		writeMDFile(progress, mdLines, document, outDirectory)

def main(args=None):
	# Parse command line arguments
	parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
	parser.add_argument('--outdir', '-o', action='store', dest='outDirectory', default = 'out', metavar = '<output directory>',  help = 'specify output directory')
	parser.add_argument('document',  help = 'document to parse')
	args = parser.parse_args()

	# Process documents and print output
	os.makedirs(args.outDirectory, exist_ok = True)

	process(args.document, args.outDirectory)

if __name__ == '__main__':
    sys.exit(main())