From c43a67536ee0c5e04176ba61654a451f0a0e8422 Mon Sep 17 00:00:00 2001
From: ankraft <an.kraft@gmail.com>
Date: Fri, 17 Jan 2025 14:09:53 +0100
Subject: [PATCH] Support for html character entities when replacing certain
 characters

---
 config.ini | 19 +++++++++++++++++--
 spec2md.py | 12 ++++++++++--
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/config.ini b/config.ini
index 46735eb..f394d9d 100644
--- a/config.ini
+++ b/config.ini
@@ -90,11 +90,26 @@ ignore = toc 1, toc 2, toc 3, toc 4, toc 5, toc 6, toc 7, toc 8, toc 9
 ; The characters to be replaced and the characters that make the
 ; replacement string must be specified as hex values
 ; To remove a character from the file set it to 00 (2 zeros)
+;
+; The following are some common characters that can be replaced as well.
+
+; Registered trademark (®) — (`&reg;`)
+; Trademark (™) — (`&trade;`)
+; Euro (€) — (`&euro;`)
+; Left arrow (←) — (`&larr;`)
+; Up arrow (↑) — (`&uarr;`)
+; Right arrow (→) — (`&rarr;`)
+; Down arrow (↓) — (`&darr;`)
+; Degree (°) — (`&#176;`)
+; Pi (π) — (`&#960;`)
+
 
 ; "(c)"
-a9 = 286329
+; a9 = 286329
+a9 = &copy;
 ; "(R)"
-ae = 285229
+; ae = 285229
+ae = &reg;
 ; space
 a0 = 20
 ; double quote
diff --git a/spec2md.py b/spec2md.py
index 1c26aa5..e12a964 100644
--- a/spec2md.py
+++ b/spec2md.py
@@ -192,8 +192,16 @@ class DocumentConfiguration(object):
 		self.generateToc = config.getboolean('toc', 'generateToc', fallback = False)
 
 		# characters
-		self.characters = { int(c, 16) : binascii.unhexlify(config.get('characters', c)).decode('utf-8')	# type: ignore [attr-defined]
-							for c in config['characters'] }
+		# self.characters = { int(c, 16) : binascii.unhexlify(config.get('characters', c)).decode('utf-8')	# type: ignore [attr-defined]
+		# 					for c in config['characters'] }
+		self.characters = {}
+		for c,v in config['characters'].items():
+			if v.startswith('&'):
+				# HTML entity
+				self.characters[int(c, 16)] = v
+			else:
+				# Unicode character
+				self.characters[int(c, 16)] = binascii.unhexlify(config.get('characters', c)).decode('utf-8')	# type: ignore [attr-defined]
 
 		# Media & Converter
 		self.emfConverterPng = config.get('media', 'emfConverterPng', fallback = None)
-- 
GitLab