# # generateTOC.py # # Script to generate the table of contents for a markdown file. # # (c) 2023 by Andreas Kraft # License: BSD 3-Clause License. See the LICENSE file for further details. # from __future__ import annotations from typing import Tuple import argparse, os, re, sys from rich import print tocTags:list[str] = ['[TOC]', '[CONTENT]', '<!--TOC-->'] """ TOC tags to replace in the document with the TOC. """ def backupFile(filename:str) -> None: """ Backup a file. Args: filename: The filename to backup. """ if os.path.isfile(filename): os.rename(filename, filename + '.bak') def processDocument(args:argparse.Namespace) -> None: """ Process the document and generate the TOC. Args: args: The command line arguments. """ def prepareTOClink(line:str) -> str: """ Prepare a link for the TOC. Args: line: The line to prepare. Returns: The prepared line. """ # Remove HTML tags line = re.sub('<[^<]+?>', '', line) # Add more special characters to replace in markdown header links if necessary return line.lower()\ .replace(' ', '-')\ .replace('.', '')\ .replace(';', '')\ .replace('&', '%26')\ .replace('(', '%28')\ .replace(')', '%29')\ .replace('>', '%3E')\ .replace('\\[', '')\ .replace(']', '')\ .replace(':', '%3A') # Read the document headers:list[Tuple[str, int]] = [] # Note: We use utf-8 and replace errors to avoid problems with special or unknown characters. with open(args.document, 'r', encoding='utf-8', errors='replace') as f: document = f.readlines() for line in document: _l = line.strip() if _l.startswith('#'): level = len(_l) - len(_l.lstrip('#')) - 1 # level is number of # - 1 # Skip if level is to large if args.levels and level >= args.levels: continue # Skip the Contents headline if necessary if (headline := _l.lstrip('#').strip()) == 'Contents' and not args.contents: continue headers.append((headline, level)) # Prepare and Print the table of contents toc = '# Contents\n\n' for h in headers: # _h = re.sub('\[', '%5B', h[0]) # _h = re.sub('\]', '%5D', _h) print(h[0]) _h = re.sub('\[', '\\[', h[0]) #_h = re.sub('\]', '\\]', _h) heading_link=prepareTOClink(_h) print(_h) print(heading_link) # toc += ' ' * (h[1] * args.indent) + f'[{h[0]}](#{prepareTOClink(h[0])}) \n' toc += ' ' * (h[1] * args.indent) + f'[{_h}](#{heading_link}) \n' toc = re.sub('<[^<]+?>', '', toc) toc += '\n' # Write the TOC to the console #print(toc) # Add the TOC to the document # The TOC replaces the old TOC if it exists in the section "# Contents" # An outfile explicitly enables the TOC to be added to the document if args.addContent or args.outfile: outDocument = args.document if not args.outfile else args.outfile backupFile(outDocument) tocDone = False # Add the TOC to the document with open(outDocument, 'w', encoding='utf-8', errors='replace') as f: inToc = False for line in document: # Skip the old TOC when writing if inToc: if not line.strip().startswith('#'): continue f.write('\n') # Add a newline inToc = False tocDone = True # Write the new TOC in the right place if not tocDone: if (args.tocTags and line.strip() in tocTags) or line.strip() == '# Contents': inToc = True f.write(toc) continue # Write the rest f.write(line) def main(args=None): def nonNegativeInt(value:str) -> int: """Check if a value is a non-negative integer. Args: value: The value to check. Returns: The value if it is a non-negative integer. Raises: argparse.ArgumentTypeError: If the value is not a non-negative integer. """ ivalue = int(value) if ivalue < 0: raise argparse.ArgumentTypeError("%s is an invalid non-negative value" % value) return ivalue # Parse command line arguments parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--add-content', '-a', action='store_true', dest='addContent', default = False, help = 'add TOC to "# Content" section in the document') parser.add_argument('--contents', '-c', action='store_true', dest='contents', default = False, help = 'add link to "Contents" section in the generated TOC') parser.add_argument('--indent', '-i', action='store', dest='indent', type = nonNegativeInt, default = 4, metavar = '<indent>', help = 'indent spaces for each level') parser.add_argument('--level', '-l', action='store', dest='levels', type = nonNegativeInt, default = 0, help = 'limit the TOC levels; 0 means no limit') parser.add_argument('--outfile', '-o', action='store', dest='outfile', help = 'set the output file name; if not set, the input file will be overwritten') parser.add_argument('--toc-tags', '-t', action='store_true', dest='tocTags', default = False, help = 'replace special TOC tokens in the document') parser.add_argument('document', help = 'document to parse') args = parser.parse_args() processDocument(args) if __name__ == '__main__': sys.exit(main())