diff --git a/README.md b/README.md index 9dc1608f07fc8f41541a7c7276ad44f51ec54357..30cc4ff900a4e0a8084da6af93ba5e6f960679ca 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ python3 -m pip install -r requirements.txt ## Usage - Create a directory with the Word document in it. The Word document **must** be in *docx* format. This can be achieved by opening the document with *Word* and save it in *docx* format to another file. - Create a configuration file with the same base name as the Word document + *.ini* extension. This file may contain different configurations as the standard *config.ini* file provided. -- Alternatively, a file named *config.ini* will apply to all files in that directory. + - Alternatively, a file named *config.ini* will apply to all files in that directory. - It is only necessary to add the settings that are different from the *config.ini* file in the project's root directory. That file will always act as a fallback. - Run the converter as follows: ``` @@ -26,6 +26,21 @@ python3 spec2md.py <path-to-word-document> Is *LibreOffice* already running? If yes, then close it. + +### Does the converter generate links to tables? + +If the table caption starts with "Table " and has a ":" after the table number then an HTML anchor with that number +is appended to the converted table caption. The anchor name is "table_\<table number>. This anchor can be used to link to the table, for example: + +```markdown +[foo](#table_7.2.4-1) + +or + +<a href="#table_7.2.4-1">bar</a> +``` + + ### Are linebreaks, paragraphs, and lists supported in table cells? Unfortunately, markdown doesn't support multiple paragraphs in table cells. A table cell must be a single line. However, one can add a html `<br />` linebreak to break between lines: diff --git a/requirements.txt b/requirements.txt index f32fa76f590e4719d71c394f447d7104ebc325c0..e3311b2d6ef706d14daa7a3eaca891772f08df08 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,9 +12,11 @@ markdown-it-py==3.0.0 # via rich mdurl==0.1.2 # via markdown-it-py -pygments==2.15.1 +pygments==2.17.2 # via rich -python-docx==0.8.11 +python-docx==1.1.0 # via oneM2M-spec-2-MD-converter (setup.py) -rich==13.4.2 +rich==13.7.0 # via oneM2M-spec-2-MD-converter (setup.py) +typing-extensions==4.8.0 + # via python-docx diff --git a/spec2md.py b/spec2md.py index 02799b4e64c7dac86aeb4753ecbefb8c2d0705ed..5a19b8ecb680321e05272135a73874c8afb7e012 100644 --- a/spec2md.py +++ b/spec2md.py @@ -89,7 +89,7 @@ class SectionNumbers(object): self.annex:int = 64 - def nextSectionNumber(self, level:int, isAnnex:bool = False) -> str: + def nextSectionNumber(self, level:int, isAnnex:Optional[bool] = False) -> str: if isAnnex: self.levels[0] = self.annex else: @@ -190,7 +190,7 @@ class DocumentConfiguration(object): self.characters = { int(c, 16) : binascii.unhexlify(config.get('characters', c)).decode('utf-8') # type: ignore [attr-defined] for c in config['characters'] } - # Media + # Media & Converter self.emfConverterPng = config.get('media', 'emfConverterPng', fallback = None) self.emfConverterSvg = config.get('media', 'emfConverterSvg', fallback = None) @@ -401,6 +401,15 @@ def processDocuments(documents:list[str], outDirectory:str, skipImageConversion: _print(f'[yellow]{_symError}') _result += f'<mark>{_symError}</mark>' + # ignore deleted test + case 'del': + pass + + # try to keep the text of inserted text + case 'ins': + for x in element: + _result += _parseXML(x) + case _ if tag in _ignoredTags: # ignore pass @@ -606,6 +615,8 @@ def processDocuments(documents:list[str], outDirectory:str, skipImageConversion: elif style in docConfig.tablecaption: lines.append('') lines.append(f'**{replaceNL(text).strip()}**') + anchor = f'<a name="table_{caption[6:].split(":")[0].strip()}"></a>' if caption.startswith('Table ') and ':' in caption else '' + lines.append(f'**{caption}**{anchor}') # Image Caption elif style in docConfig.imagecaption: @@ -775,14 +786,14 @@ def processDocuments(documents:list[str], outDirectory:str, skipImageConversion: # Convert media files # - def _convertImage(converter:str, format:str): + def _convertImage(converter:str, format:str) -> None: if converter: for fn in list(emfFiles): _f = Path(fn) # Filename to handle _t = imageDirName # Target directory if _f.stem not in referencedImages: if not fn.startswith(unreferencedDirName): - _print(f'[red]Unreferenced image: {PurePath(fn).name} {"(skipped)" if docConfig.skipUnreferencedMediaFiles else ""}', highlight = False) + _print(f'[yellow]Unreferenced image in the document: {PurePath(fn).name} {"(skipped)" if docConfig.skipUnreferencedMediaFiles else ""}', highlight = False) _print(f'[yellow]Moving image file to: {unreferencedDirName}', highlight = False) _n = f'{unreferencedDirName}/{_f.name}' _p = _f.replace(_n) @@ -796,7 +807,9 @@ def processDocuments(documents:list[str], outDirectory:str, skipImageConversion: cmd = cmd.replace('{infile}', fn).replace('{outdir}', _t) _print(f'Converting EMF file: {fn} to "{format}"', highlight = False) if (res := subprocess.run(cmd, shell = True, capture_output = True)).returncode != 0: - _print(f'[red] Error running command: {res.stderr.decode("utf-8")}') + _print(f'[red]Error running command: {res.stderr.decode("utf-8")}') + _print(f'[red]Please check the configuration file -> section "\[media]" for the converter command: {converter}') + break if not skipImageConversion: if docConfig.emfConverterPng: