#
#	addTrackedChanges.py
#
#	Script to convert all improvised changes in a docx file (underlines, strike-throughs) 
#	to either coloured text or tracked changes.
#
#	(c) 2023 by Andreas Kraft, Miguel Ortega
#	License: BSD 3-Clause License. See the LICENSE file for further details.
#

from typing import Optional
import argparse, tempfile, zipfile
import lxml.etree as ET

wns = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'


def convertChanges(fn:str, 
				   outputFn:str,
				   author:Optional[str] = None, 
				   date:Optional[str] = None, 
				   colourOnly:Optional[bool] = False) -> None:
	"""	Converts all improvised changes in a docx file to either coloured text or tracked changes.

		All underlined text is converted to either green text or tracked insertions.
		All striked text is converted to either red text or tracked deletions.

		Args:
			fn: The path to the docx file.
			outputFn: The path and filename to the output docx file.
			author: The author of the changes. Defaults to None.
			date: The date of the changes. Defaults to None.
			colourOnly: If True, only the colour of the changes is changed, but not converted to tracked changes. Defaults to False.
	"""
	
	# Create a temporary directory (automatically cleaned up after the with statement)
	with tempfile.TemporaryDirectory() as tmpDir:

		# Extract the docx file to the temporary directory
		originalFileList = zipfile.ZipFile(fn).namelist()
		zipfile.ZipFile(fn).extractall(tmpDir)

		# Get the path to the document.xml file
		xmlFn = f'{tmpDir}/word/document.xml'

		# Register all namespaces in the given XML file.
		# This is necessary to avoid the namespaces being removed when parsing the XML file.
		# For this the XML file is parsed once and all namespaces are registered.
		namespaces = dict([node for _, node in ET.iterparse(xmlFn, events = ['start-ns'])])
		for ns in namespaces:
			ET.register_namespace(ns, namespaces[ns])
				
		# Parse and handlethe XML file
		with open(xmlFn, 'r') as f:
			tree = ET.parse(f)

			#
			# Handle insertions
			#

			# Find all underline elements
			for e in tree.findall(f'.//{wns}u'):
				if colourOnly:
					e.getparent().append(ins := ET.Element(f'{wns}color'))
					ins.set(f'{wns}val', '00FF00')
				else:

					run = e.getparent().getparent()

					# Add ins element as the parent of the run
					idxOfRunInParent = run.getparent().index(run)
					run.getparent().insert(idxOfRunInParent, ins := ET.Element(f'{wns}ins'))
					
					# Add author and date elements
					if author:
						ins.set(f'{wns}author', author)
					if date:
						ins.set(f'{wns}date', date)
					
					# Move the run element to the ins element
					ins.append(run)

					# Remove the underline element
					e.getparent().remove(e)

			#
			# Handle deletions
			#

			# Find all strike elements
			for e in tree.findall(f'.//{wns}strike'):
				if colourOnly:
					if e.attrib.get(f'{wns}val', 'true') == 'true':
						e.getparent().append(ins := ET.Element(f'{wns}color'))
						ins.set(f'{wns}val', 'FF0000')

				else:
					if e.attrib.get(f'{wns}val', 'true') == 'true':

						run = e.getparent().getparent()

						# Add del element as the parent of the run
						# Get the corect index of the found element in the structure to add it later again
						idxOfRunInParent = run.getparent().index(run)
						run.getparent().insert(idxOfRunInParent, dl := ET.Element(f'{wns}del'))
						
						# Add author and date elements
						if author:
							dl.set(f'{wns}author', author)
						if date:
							dl.set(f'{wns}date', date)
						
						# Replace the run's text with the delText element
						t = run.find(f'{wns}t') 				# find the t element
						delText = ET.Element(f'{wns}delText')	# create a new delText element
						delText.text = t.text					# copy the text from the t element to the delText element
						run.append(delText)						# add the delText element to the run element
						run.remove(t)							# remove the old t element from the run element

						# Move the run element to the del element
						dl.append(run)

						# Remove the underline element
						e.getparent().remove(e)


		# Write back the modified XML file
		tree.write(f'{tmpDir}/word/document.xml')

		# Create a new docx file with the modified XML file
		zip = zipfile.ZipFile(outputFn, 'w')
		for fn in originalFileList:
			zip.write(f'{tmpDir}/{fn}', fn, compress_type = zipfile.ZIP_DEFLATED , compresslevel = 9)


if __name__ == '__main__':

	# Parse command line arguments
	parser = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter)
	parser.add_argument('--outfile', '-o', action = 'store', dest = 'outfile', default = 'out.docx', metavar = '<output filename>',  help = 'specify the output name for the Word document')
	parser.add_argument('--author', '-a', action = 'store', dest = 'author', default = None, metavar = '<author>',  help = 'specify the author of the changes')
	parser.add_argument('--date', '-d', action = 'store', dest = 'date', default = None, metavar = '<date>',  help = 'specify the date of the changes (e.g. "2023-07-21T14:09:02")')
	parser.add_argument('--colourOnly', '-c', action = 'store_true', dest = 'colourOnly', default = False,  help = 'only set the colour of the changes, but do not convert them to tracked changes')
	parser.add_argument('document',  help = 'document to parse')
	args = parser.parse_args()

	# Convert the changes
	convertChanges(args.document,
				   args.outfile, 
				   author = args.author, 
				   date = args.date, 
				   colourOnly = args.colourOnly)