diff --git a/processMDSpec.py b/processMDSpec.py new file mode 100644 index 0000000000000000000000000000000000000000..f065f95d5a71f7d428886d4e8272095b02dce65e --- /dev/null +++ b/processMDSpec.py @@ -0,0 +1,198 @@ +# +# processMDSpec.py +# +# (c) 2025 by Andreas Kraft +# License: BSD 3-Clause License. See the LICENSE file for further details. +# +""" This script processes markdown specification files. It handles the + include statements and the front matter. It can also render the markdown + content on console or output the front matter only. +""" + +from __future__ import annotations +from typing import Tuple, Generator +import argparse +from rich import print, markdown +import re, sys, yaml +from contextlib import contextmanager + + +_frontMatter:dict = {} +_includeStack:list[str] = [] + +@contextmanager +def includeStack(filename:str) -> Generator [None, None, None]: + """ Handle the include stack. + + This is used to detect circular includes and to keep track of the + include stack. + + Args: + filename: The name of the file being processed. + + Raises: + Exception: If a circular include is detected. + + Returns: + Generator: A generator that yields nothing. + """ + if filename in _includeStack: + print(f'[red]Circular include detected: {filename}') + raise Exception('Circular include detected') + _includeStack.append(filename) + yield + _includeStack.pop() + + +def processFrontMatter(lines:list[str], args:argparse.Namespace) -> Tuple[dict, list[str]]: + """ Process the front matter of a markdown file. This includes extracting + the front matter information and returning it as a dictionary. + + Currently only YAML front matter is supported. It can be extended later. + + Args: + lines: The lines of the markdown file. + args: The command line arguments. + + Raises: + yaml.YAMLError: If the front matter cannot be parsed as YAML. + + Returns: + dict: The front matter information as a dictionary. + list[str]: The lines of the markdown file without the front matter. + """ + + if not lines or not lines[0].startswith('---'): + return {}, lines + + frontMatterLines:list[str] = [] + for line in lines[1:]: + if re.match(r'^---\s*', line): + break + frontMatterLines.append(line) + + # Remove the front matter from the lines + lines = lines[len(frontMatterLines)+2:] + + # Parse the front matter as YAML + try: + return yaml.safe_load(''.join(frontMatterLines)), lines + except yaml.YAMLError as e: + print(f'[red]Error parsing front matter: {e}') + raise + + +def processFile(args:argparse.Namespace) -> str: + """ Handle the include statements in the markdown files. This includes + processing the include statements and removing the include statements + from the markdown files. + + Args: + args: The command line arguments. + + Raises: + Exception: If the file cannot be processed. + + Returns: + The processed markdown content as a string. + """ + + def handleIncludesForFile(filename:str) -> str: + """ Read a single markdown file and return its content. + + Args: + filename: The name of the file to read. + + Raises: + FileNotFoundError: If the file cannot be found. + + Returns: + The content of the file. + """ + + with includeStack(filename): + try: + with open(filename, 'r') as f: + lines = f.readlines() + except FileNotFoundError: + print(f'[red]File not found: {filename}') + raise + + # extract front matter information + fm, lines = processFrontMatter(lines, args) + if fm: + _frontMatter[filename] = fm + + if not args.doInclude: + return ''.join(lines) + + inCodeFence = False + for line in lines: + + # Ignore code fences + if re.match(r'^\s*```.*', line): + inCodeFence = not inCodeFence + continue + if inCodeFence: + continue + + # Check for ::include{file=...} pattern using regex at the beginning of a line + match = re.search(r'^::include\{\s*file=(.*?)\s*\}', line.strip()) + if match: + include_filename = match.group(1) + # Read the included file and replace the include statement with its content + include_content = handleIncludesForFile(include_filename) + lines[lines.index(line)] = include_content + + return ''.join(lines) + + return handleIncludesForFile(args.document) + + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Process markdown specification files.') + parser.add_argument('--no-include', dest='doInclude', action='store_false', default=True, help="don't process include statements") + parser.add_argument('--render-markdown', '-md', dest='renderAsMarkdown', action='store_true', help='render output as markdown') + parser.add_argument('--process-frontmatter', '-fm', dest='outputFrontMatter', action='store_true', help='output front matter only') + parser.add_argument('--frontmatter-only', '-fmo', dest='onlyFrontMatter', action='store_true', help='output only front matter') + parser.add_argument('--verbose', '-v', action='store_true', help='print debug information to stderr.') + parser.add_argument('document', type = str, help = 'a markdown specification document to process') + args = parser.parse_args() + + if args.verbose: + if not args.doInclude: + print(f'[yellow]Skipping processing include statements', file=sys.stderr) + else: + print(f'[green]Processing include statements', file=sys.stderr) + + try: + lines = processFile(args) + except Exception as e: + print(f'[red]Error processing file: {e}', file=sys.stderr) + quit(1) + + if args.outputFrontMatter or args.onlyFrontMatter: + # Collect front matter information in the output + if not args.onlyFrontMatter: + print('---') + + # The following is a workaround to keep the order of the dictionary + # see https://stackoverflow.com/a/52621703 + yaml.add_representer(dict, lambda self, data: yaml.representer.SafeRepresenter.represent_dict(self, data.items())) + print(yaml.dump(_frontMatter, default_flow_style=False), end='') + + if not args.onlyFrontMatter: + print('---') + + if not args.onlyFrontMatter: + if args.renderAsMarkdown: + # Render the markdown content + print(markdown.Markdown(lines)) + else: + # Print the raw markdown content + print(lines) + + +