From 10048d4b5f8a74b27ce0a772177113b49b4a85b3 Mon Sep 17 00:00:00 2001
From: ankraft <an.kraft@gmail.com>
Date: Thu, 27 Jul 2023 16:56:28 +0200
Subject: [PATCH] Added converting bold and italic text in paragraphs, headers
 and tables.

---
 README.md        | 10 +++++++---
 requirements.txt | 18 +++++++++++-------
 setup.py         |  1 +
 spec2md.py       | 37 +++++++++++++++++++++++++++++--------
 4 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 55c16da..3df277e 100644
--- a/README.md
+++ b/README.md
@@ -12,9 +12,9 @@ python3 -m pip install -r requirements.txt
 
 ## Usage
 - Create a directory with the Word document in it. The Word document **must** be in *docx* format. This can be achieved by opening the document with *Word* and save it in *docx* format to another file.
-- Create a configuration file with the same basename as the Word document + *.ini* extension. This file may contain different configurations as the standard *config.ini* file provided. 
-  - Alternativaly, a file named *config.ini* will apply to all files in that directory.
-  - It is only necessary to add the settings that are different from the *config.ini* file in the projects root directoy. That file will always act as a fallback.
+- Create a configuration file with the same base name as the Word document + *.ini* extension. This file may contain different configurations as the standard *config.ini* file provided. 
+- Alternatively, a file named *config.ini* will apply to all files in that directory.
+	- It is only necessary to add the settings that are different from the *config.ini* file in the project's root directory. That file will always act as a fallback.
 - Run the converter as follows:
 ```
 python3 spec2md.py <path-to-word-document>
@@ -25,3 +25,7 @@ python3 spec2md.py <path-to-word-document>
 ### The converter doesn't seem to generate image files.
 
 Is *LibreOffice* already running? If yes, then close it.
+
+## Changes
+
+- **2023-07-27** - Added converting bold and italic text in paragraphs, headers and tables.
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 42ccd5d..f32fa76 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,16 +1,20 @@
 #
-# This file is autogenerated by pip-compile with python 3.10
-# To update, run:
+# This file is autogenerated by pip-compile with Python 3.10
+# by the following command:
 #
 #    pip-compile
 #
-commonmark==0.9.1
+lxml==4.9.3
+    # via
+    #   oneM2M-spec-2-MD-converter (setup.py)
+    #   python-docx
+markdown-it-py==3.0.0
     # via rich
-lxml==4.9.1
-    # via python-docx
-pygments==2.13.0
+mdurl==0.1.2
+    # via markdown-it-py
+pygments==2.15.1
     # via rich
 python-docx==0.8.11
     # via oneM2M-spec-2-MD-converter (setup.py)
-rich==12.5.1
+rich==13.4.2
     # via oneM2M-spec-2-MD-converter (setup.py)
diff --git a/setup.py b/setup.py
index 0e24443..7ac245e 100644
--- a/setup.py
+++ b/setup.py
@@ -9,6 +9,7 @@ setup(
 	description='Convert oneM2M specifications to Markdown',
 	packages=find_packages(),
 	install_requires=[
+        'lxml',
 		'rich',
 		'python-docx',
 	 ]
diff --git a/spec2md.py b/spec2md.py
index e760852..2c851eb 100644
--- a/spec2md.py
+++ b/spec2md.py
@@ -24,8 +24,7 @@ from rich.progress import Progress, TextColumn, BarColumn
 from rich.console import Console
 from rich import inspect
 import configparser, zipfile
-from xml.etree import ElementTree as ET
-
+from lxml import etree as ET
 
 class Style(IntEnum):
 	example = auto()
@@ -78,7 +77,9 @@ _captionMarker = '__CAPTION__'
 console = Console()
 _print:Callable = print
 
-
+# Some predefined tags and attributes
+wns = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'
+_val = f'{{{wns}}}val'
 
 class SectionNumbers(object):
 
@@ -286,7 +287,7 @@ def processDocuments(documents:list[str], outDirectory:str) -> None:
 			return tag
 
 
-		def getTextFromXML(elem:Paragraph) -> str:
+		def getTextFromXML(elem:Paragraph|_Cell) -> str:
 
 			#	Not-used document tags.
 			_ignoredTags = ( 'AlternateContent',
@@ -322,7 +323,18 @@ def processDocuments(documents:list[str], outDirectory:str) -> None:
 						for x in element:
 							_result += _parseXML(x)
 					case 't':
-						_result += str(toMD(str(element.text)))
+						bold = ''
+						italics = ''
+						for e in element.getparent():
+							if strippedTag(e.tag) == 'rPr':	# paragraph style
+								for ep in e:
+									match strippedTag(ep.tag):
+										case 'b' if ep.attrib.get(_val, 'true') == 'true':
+											bold = '**'
+										case 'i' if ep.attrib.get(_val, 'true') == 'true':
+											italics = '_'
+						_result += f'{bold}{italics}{str(toMD(str(element.text)))}{italics}{bold}'
+
 					case 'br':
 						_result += _linebreak
 					case 'bookmarkStart' | 'bookmarkEnd':		# TODO ?
@@ -381,7 +393,16 @@ def processDocuments(documents:list[str], outDirectory:str) -> None:
 				return _result
 
 			#_print(ET.fromstring(elem._p.xml))
-			return _parseXML(ET.fromstring(elem._p.xml))
+			match elem:
+				case Paragraph():	# type: ignore[misc]
+					return _parseXML(ET.fromstring(elem._p.xml))
+				case _Cell():		# type: ignore[misc]
+					result = ''
+					for p in elem.paragraphs:
+						result += _parseXML(ET.fromstring(p._p.xml))
+					return result
+				case _:
+					return ''
 
 
 
@@ -620,7 +641,7 @@ def processDocuments(documents:list[str], outDirectory:str) -> None:
 						for row in elem.rows:
 							cells:list[str] = []
 							for cell in row.cells:
-								cells.append(f'{toMD(cell.text)} ')	# add at least a space
+								cells.append(f'{getTextFromXML(cell)} ')	# add at least a space
 							rows.append(cells)
 							nrRows += 1
 						
@@ -706,7 +727,7 @@ def processDocuments(documents:list[str], outDirectory:str) -> None:
 
 			for i in range(len(lines)):
 				line = lines[i]
-				lines[i] = re.sub(_referenceExpression, _repl, line)
+				lines[i] = re.sub(_referenceExpression, _repl, line)	# type:ignore[arg-type]
 
 			#
 			#	Write produced Markdown file
-- 
GitLab