diff --git a/README.md b/README.md index 30cc4ff900a4e0a8084da6af93ba5e6f960679ca..df9a76c519ee2c5cf2bc72fb5f8b594436c0886a 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,8 @@ python3 -m pip install -r requirements.txt ## Usage - Create a directory with the Word document in it. The Word document **must** be in *docx* format. This can be achieved by opening the document with *Word* and save it in *docx* format to another file. -- Create a configuration file with the same base name as the Word document + *.ini* extension. This file may contain different configurations as the standard *config.ini* file provided. - - Alternatively, a file named *config.ini* will apply to all files in that directory. +- Optional: Create a configuration file *config.ini* in that directory. This file may contain different configurations as the configuratioon file in the project's root directory. This confiuration file will apply to all files in that directory + - Alternatively, a configuration wile with the same base name as the Word document. This configuration file will only apply to the Word document with the same base name. - It is only necessary to add the settings that are different from the *config.ini* file in the project's root directory. That file will always act as a fallback. - Run the converter as follows: ``` @@ -63,5 +63,6 @@ Lists in table cells are also not possible. One may use html lists for this, but ## Changes +- **2024-01-09** - Added support for merging consecutive code paragraphs into a single code block. - **2023-08-18** - Improved handling of sometimes broken inline formatting in table cells. Adding more default heading formats. - **2023-07-27** - Added converting bold and italic text in paragraphs, headers and tables. \ No newline at end of file diff --git a/config.ini b/config.ini index 991a5751156b4ada5348b4bd2988c5193652330e..af8585c3683c33d8f012f0e06192e30c0a529099 100644 --- a/config.ini +++ b/config.ini @@ -7,8 +7,12 @@ [general] +# Replace non-breaking spaces in the word document with an HTML space entity. replaceNbsp = +# Replace the less than character in the word document with an HTML entity. +replaceLt = < + ; Rename EMF/WMF image references to a different file extension. ; Allowed values: png, svg. ; If not preseent, no renaming will happen. @@ -18,12 +22,14 @@ renameEMFExtension = svg ; Default: false skipUnreferencedMediaFiles = false -replaceLt = < +# Combine code paragraphs into a single markdown code paragraph. +combineCodeParagraphs = true ; Add image captions to the markdown's alternate text. ; Note, that the image caption has follow the image in the document. imageCaptions2AltText = true + [toc] addSectionNumbers = false excludeFromNumbering = @@ -35,7 +41,7 @@ addTocMacro = false [paragraphs] normal = normal -h1 = heading 1 +h1 = heading 1, tt h2 = heading 2 h3 = heading 3 h4 = heading 4 @@ -48,6 +54,7 @@ a1 = heading 1 a2 = heading 2 a3 = heading 3 note = no +code = pl example = ex, ew ul1 = b1, b1+, list paragraph ul2 = b2, b2+ diff --git a/spec2md.py b/spec2md.py index b4a82a17e7c7cc65c83fc60f25481aa57120041b..d618ce5007f694e8b94173d9de53f1b564ee2759 100644 --- a/spec2md.py +++ b/spec2md.py @@ -27,6 +27,7 @@ import configparser, zipfile from lxml import etree as ET class Style(IntEnum): + code = auto() example = auto() image = auto() imagecaption = auto() @@ -145,6 +146,7 @@ class DocumentConfiguration(object): self.renameEMFExtension = config.get('general', 'renameEMFExtension', fallback = None) self.skipUnreferencedMediaFiles = config.getboolean('general', 'skipUnreferencedMediaFiles', fallback = False) self.imageCaptions2AltText = config.getboolean('general', 'imageCaptions2AltText', fallback = True) + self.combineCodeParagraphs = config.getboolean('general', 'combineCodeParagraphs', fallback = True) # Paragraphs self.paragraphs = { c : config.getlist('paragraphs', c) # type: ignore [attr-defined] @@ -170,6 +172,7 @@ class DocumentConfiguration(object): self.ul4 = self.paragraphs['ul4'] self.ul5 = self.paragraphs['ul5'] #self.continuedlist = self.paragraphs['continuedlist'] + self.code = self.paragraphs['code'] self.note = self.paragraphs['note'] self.example = self.paragraphs['example'] self.tablecaption = self.paragraphs['tablecaption'] @@ -653,6 +656,12 @@ def processDocuments(documents:list[str], outDirectory:str, skipImageConversion: lines.append('') lines.append(text) + # Code + elif style in docConfig.code: + checkSameStyle(Style.code, lambda:lines.append('')) + for _t in text.split(_linebreak): + lines.append(f'```{_t if _t else " "}``` ') # at least an empty space. And 2 spaces at the end for newline + # Example elif style in docConfig.example: checkSameStyle(Style.example, lambda:lines.append('')) @@ -745,6 +754,29 @@ def processDocuments(documents:list[str], outDirectory:str, skipImageConversion: lines[i] = line + # + # Combine mutiple consecutive "code" lines + # + + if docConfig.combineCodeParagraphs: + codeblock:list[str] = [] + _lines:list[str] = [] + for i in range(len(lines)): + line = lines[i] + if line.startswith('```') and line.endswith('``` '): + # Store code block + codeblock.append(line[3:-5]) + elif codeblock: + # Add whole code block to lines + _lines.append('```') + _lines.extend(codeblock) + _lines.append('```') + codeblock = [] + else: + # Add line + _lines.append(line) + lines = _lines + # # Insert auto-generated table of contents #