diff --git a/toMkdocs/toMkdocs.py b/toMkdocs/toMkdocs.py index 71a5b857145191f19b83891fa49240843c27dab7..1deb1c8d932b3e0dbd3c531cc1f17d9f35cc6a3d 100644 --- a/toMkdocs/toMkdocs.py +++ b/toMkdocs/toMkdocs.py @@ -7,6 +7,8 @@ # directory structure. # from __future__ import annotations + +import logging from enum import Enum, auto import argparse, re, os, shutil, hashlib, base64 from dataclasses import dataclass @@ -485,6 +487,19 @@ def parse_pandoc_table_with_spans(pandoc_table): self.list_flag = False self.auxiliar_index = None + def set_alignment(self): + header_delimiter_index = 0 + while header_delimiter_index in range(len(default_alignments)) and self.position > header_delimiter_positions[header_delimiter_index]: + header_delimiter_index += 1 + if header_delimiter_index in range(len(default_alignments)): + if self.position < header_delimiter_positions[header_delimiter_index]: + self.alignment = default_alignments[header_delimiter_index] + elif self.position == header_delimiter_positions[header_delimiter_index]: + self.alignment = default_alignments[header_delimiter_index] + header_delimiter_index += 1 + else: + raise ValueError("Invalid table formatting") + class Row(): """ Represents a row in the markdown file. """ cells:list[Cell] = [] @@ -492,6 +507,12 @@ def parse_pandoc_table_with_spans(pandoc_table): def __init__(self, length: int = 1) -> None: self.cells = [Cell() for _ in range(length)] + def __getitem__(self, item): + return self.cells[item] + + def __setitem__(self, key, value): + self.cells[key] = value + # Detect separator lines by pattern (it does not take into account partial separators def is_separator(line): return _matchGridTableSeparator.match(line) @@ -573,7 +594,7 @@ def parse_pandoc_table_with_spans(pandoc_table): has_header = True header_separator_index = index header_rows = [] - parts = re.split(r"\s*\+\s*", lines[index].strip("+")) + parts = re.split(r"\+", lines[index].strip("+")) default_alignments = [] #Calculate default alignments and positions of delimiters for part_index in range(len(parts)): @@ -592,9 +613,6 @@ def parse_pandoc_table_with_spans(pandoc_table): for row in range(len(separator_indices) - 1): table_row = [] auxiliar_rows = [] - auxiliar_row = [] - use_auxiliar_row = [] - list_flags = [] has_merged_cells = False in_data_row = False start, end = separator_indices[row], separator_indices[row + 1] @@ -623,45 +641,31 @@ def parse_pandoc_table_with_spans(pandoc_table): table_row = Row(number_of_columns_row) for i in range(number_of_columns_row): delimiter_index += len(parts[i]) + 1 - table_row.cells[i].alignment = default_alignments[i] if i == 0 else "align=\"center\"" - table_row.cells[i].position = delimiter_index # Position of cell delimiter + + table_row[i].alignment = default_alignments[i] if i == 0 else "align=\"center\"" + table_row[i].position = delimiter_index # Position of cell delimiter + #Set alignment as defined by header separator line - while header_delimiter_index in range(len(default_alignments)) and table_row.cells[i].position > header_delimiter_positions[header_delimiter_index]: - header_delimiter_index += 1 - if header_delimiter_index in range(len(default_alignments)): - if table_row.cells[i].position < header_delimiter_positions[header_delimiter_index]: - table_row.cells[i].alignment = default_alignments[header_delimiter_index] - elif table_row.cells[i].position == header_delimiter_positions[header_delimiter_index]: - table_row.cells[i].alignment = default_alignments[i] - header_delimiter_index += 1 - else: - raise ValueError("Invalid table formatting") - - #auxiliar_row = Row(number_of_columns) - #for i in range(number_of_columns): - #auxiliar_row.append(default_cell) - #use_auxiliar_row.append(False) - #auxiliar_rows.append({'auxiliar_row':auxiliar_row, 'use_auxiliar':use_auxiliar_row, 'list_flags':list_flags}) + table_row[i].set_alignment() elif in_data_row: # Regular data row or partial separator if _matchGridTableBodySeparator.match(line): # Partial separator has_merged_cells = True + cells = re.split(r"[\|\+]", line.strip("|").strip("+")) # (?<!\\)[\|\+] #Add auxiliar line, set delimiters for each cell auxiliar_rows.append(Row(number_of_columns)) aux_delimiter_index = 0 - for i in range(number_of_columns_row): - aux_delimiter_index += len(parts[i]) + 1 - auxiliar_rows[-1].cells[i].position = aux_delimiter_index # Position of cell delimiter + + for auxiliar_cell_index in range(number_of_columns): + aux_delimiter_index += len(cells[auxiliar_cell_index]) + 1 + auxiliar_rows[-1][auxiliar_cell_index].position = aux_delimiter_index # Position of cell delimiter + + auxiliar_rows[-1][i].set_alignment() - cells = re.split(r"\s*[\|\+]\s*", line.strip("|").strip("+")) # (?<!\\)[\|\+] if len(cells) <= number_of_columns: # Colspan: Positions of | with respect to + need to be determined for i in range(len(cells)): if _matchGridTableBodySeparatorLine.match(cells[i]): # A new row is to be added #auxiliar_rows[-1]['use_auxiliar_row'][i] = True - auxiliar_rows[-1].cells[i].list_flag = False - table_row.cells[i].auxiliar_index = len(auxiliar_rows)-1 + auxiliar_rows[-1][i].list_flag = False + table_row[i].auxiliar_index = len(auxiliar_rows)-1 #if cells[i].startswith(":") and not cells[i].endswith(":"): # auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"left\"" #elif not cells[i].startswith(":") and cells[i].endswith(":"): @@ -670,37 +674,20 @@ def parse_pandoc_table_with_spans(pandoc_table): # auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"center\"" else: # Handle content of the cell - if table_row.cells[i].auxiliar_index is not None: # and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]: - auxiliar_rows[table_row.cells[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index][i], cells[i]) - if not auxiliar_rows[table_row.cells[i].auxiliar_index][i].colspan_adjusted: - auxiliar_rows[table_row.cells[i].auxiliar_index][i].colspan_adjusted = True + if table_row[i].auxiliar_index is not None: # and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]: + auxiliar_rows[table_row[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row[i].auxiliar_index][i], cells[i]) + if not auxiliar_rows[table_row[i].auxiliar_index][i].colspan_adjusted: + auxiliar_rows[table_row[i].auxiliar_index][i].colspan_adjusted = True # TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator - auxiliar_rows[table_row.cells[i].auxiliar_index][i] = adjust_colspan(auxiliar_rows[table_row.cells[i].auxiliar_index], i, len(cells), line, number_of_columns, delimiter_positions) + auxiliar_rows[table_row[i].auxiliar_index][i] = adjust_colspan(auxiliar_rows[table_row[i].auxiliar_index], i, len(cells), line, number_of_columns, delimiter_positions) else: - table_row.cells[i] = handling_content(table_row.cells[i], cells[i]) + table_row[i] = handling_content(table_row[i], cells[i]) # Cell which is not separator - table_row.cells[i].rowspan += 1 + table_row[i].rowspan += 1 if not table_row.cells[i].colspan_adjusted: - table_row.cells[i].colspan_adjusted = True + table_row[i].colspan_adjusted = True #TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator - table_row.cells[i] = adjust_colspan(table_row.cells, i, len(cells), line, number_of_columns, delimiter_positions) - #elif len(cells) == number_of_columns: # Simple row with partial separator, # A new row is to be added - # for i in range(len(cells)): - # if _matchGridTableBodySeparatorLine.match(cells[i]): # Update cell in new row - # use_auxiliar_row[i] = True - # list_flags[i] = False - # if cells[i].startswith(":") and not cells[i].endswith(":"): - # auxiliar_row[i]['alignment'] = "align=\"left\"" - # elif not cells[i].startswith(":") and cells[i].endswith(":"): - # auxiliar_row[i]['alignment'] = "align=\"right\"" - # else: - # auxiliar_row[i]['alignment'] = "align=\"center\"" - # else: - # #Handle content of the cell - # list_flags[i], table_row[i] = handling_content(table_row[i], cells[i],list_flags[i]) - # # Cell which is not separator - # table_row[i]['rowspan'] += 1 - # # Adjusting of colspan not needed, no colspan as number of cells is equal to number of columns + table_row[i] = adjust_colspan(table_row, i, len(cells), line, number_of_columns, delimiter_positions) else: raise ValueError("More cells than columns found") else: # Data row @@ -708,30 +695,29 @@ def parse_pandoc_table_with_spans(pandoc_table): if len(cells) < number_of_columns: # Colspan: Positions of | with respect to + need to be determined for i in range(len(cells)): # Handle content of the cell - if table_row.cells[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]: - auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i], cells[i]) + if table_row[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]: + auxiliar_rows[table_row.cells[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row[i].auxiliar_index][i], cells[i]) if not auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i].colspan_adjusted: auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i].colspan_adjusted = True #TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator - auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = adjust_colspan(auxiliar_rows[table_row.cells[i].auxiliar_index].cells, i, len(cells), line, number_of_columns, delimiter_positions) + auxiliar_rows[table_row[i].auxiliar_index][i] = adjust_colspan(auxiliar_rows[table_row[i].auxiliar_index].cells, i, len(cells), line, number_of_columns, delimiter_positions) else: - table_row.cells[i] = handling_content(table_row.cells[i], cells[i]) + table_row[i] = handling_content(table_row[i], cells[i]) if not table_row.cells[i].colspan_adjusted: - table_row.cells[i].colspan_adjusted = True - table_row.cells[i] = adjust_colspan(table_row.cells, i, len(cells), line, number_of_columns, delimiter_positions) + table_row[i].colspan_adjusted = True + table_row[i] = adjust_colspan(table_row.cells, i, len(cells), line, number_of_columns, delimiter_positions) elif len(cells) == number_of_columns: # Simple row for i in range(len(cells)): - if table_row.cells[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]: - auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i] = handling_content(auxiliar_rows[table_row.cells[i].auxiliar_index].cells[i], cells[i]) + if table_row[i].auxiliar_index is not None:# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]: + auxiliar_rows[table_row[i].auxiliar_index][i] = handling_content(auxiliar_rows[table_row[i].auxiliar_index][i], cells[i]) else: # Handle content of the cell - table_row.cells[i] = handling_content(table_row.cells[i], cells[i]) + table_row[i] = handling_content(table_row[i], cells[i]) else: raise ValueError("More cells than columns found") else: raise ValueError("No separator line found for row starting") - if has_header and start >= header_separator_index: # table_row and auxiliar_row are part of data_rows data_rows.append(table_row.cells) if has_merged_cells: @@ -759,7 +745,7 @@ def parse_pandoc_table_with_spans(pandoc_table): for cell in row: if cell.content is not None: # Replacing "<" by < - cell.content = cell.content.replace("<", "<") + #cell.content = cell.content.replace("<", "<") #Bold for bold_characters in ["**", "__"]: @@ -828,75 +814,79 @@ def generate_html_table_with_spans(pandoc_table): :param pandoc_table: String of the Pandoc-style grid table. :return: HTML string. """ - grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table) - - html = "<table>\n" - has_header = False + try: + grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table) + except: + logging.ERROR("Grid table could not be generated") + return "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS" + else: + html = "<table>\n" + has_header = False - for row in grid_header: - for cell in row: - if cell.rowspan != 0 and cell.colspan != 0: - has_header = True - if has_header: - html += " <thead>\n" for row in grid_header: + for cell in row: + if cell.rowspan != 0 and cell.colspan != 0: + has_header = True + if has_header: + html += " <thead>\n" + for row in grid_header: + html += " <tr>\n" + for cell in row: + if cell.rowspan == 0 or cell.colspan == 0: + continue + else: + # Prepare content, in case there's a list + #print(cell.content) + if matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>", + cell.content): # Update cell in new row + #print("MATCHING") + list = "<ul>" + # Build list the matches + for match in matches: + list += "<li>" + match[1] + "</li>" + list += "</ul>" + cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+", list, cell.content) + # Enforce left alignment if cell contains a list + cell.alignment = "align=\"left\"" + #else: + # print("NOT MATCHING") + + rowspan = f" rowspan=\"{cell.rowspan}\"" if cell.rowspan > 1 else "" + colspan = f" colspan=\"{cell.colspan}\"" if cell.colspan > 1 else "" + html += f" <th{rowspan}{colspan} {cell.alignment}>{cell.content}</th>\n" + html += " </tr>\n" + html += " </thead>\n" + + html += " <tbody>\n" + for row in grid_body: html += " <tr>\n" for cell in row: if cell.rowspan == 0 or cell.colspan == 0: continue else: - # Prepare content, in case there's a list + #Prepare content, in case there's a list #print(cell.content) - if matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>", - cell.content): # Update cell in new row + if matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>", cell.content): # Update cell in new row #print("MATCHING") + #print(cell.content) list = "<ul>" # Build list the matches for match in matches: list += "<li>" + match[1] + "</li>" list += "</ul>" - cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+", list, cell.content) + cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+",list, cell.content) # Enforce left alignment if cell contains a list cell.alignment = "align=\"left\"" #else: - # print("NOT MATCHING") - + #print("NOT MATCHING") rowspan = f" rowspan=\"{cell.rowspan}\"" if cell.rowspan > 1 else "" colspan = f" colspan=\"{cell.colspan}\"" if cell.colspan > 1 else "" - html += f" <th{rowspan}{colspan} {cell.alignment}>{cell.content}</th>\n" + html += f" <td{rowspan}{colspan} {cell.alignment}>{cell.content}</td>\n" html += " </tr>\n" - html += " </thead>\n" - html += " <tbody>\n" - for row in grid_body: - html += " <tr>\n" - for cell in row: - if cell.rowspan == 0 or cell.colspan == 0: - continue - else: - #Prepare content, in case there's a list - #print(cell.content) - if matches := re.findall(r"\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>", cell.content): # Update cell in new row - #print("MATCHING") - #print(cell.content) - list = "<ul>" - # Build list the matches - for match in matches: - list += "<li>" + match[1] + "</li>" - list += "</ul>" - cell.content = re.sub(r"(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+",list, cell.content) - # Enforce left alignment if cell contains a list - cell.alignment = "align=\"left\"" - #else: - #print("NOT MATCHING") - rowspan = f" rowspan=\"{cell.rowspan}\"" if cell.rowspan > 1 else "" - colspan = f" colspan=\"{cell.colspan}\"" if cell.colspan > 1 else "" - html += f" <td{rowspan}{colspan} {cell.alignment}>{cell.content}</td>\n" - html += " </tr>\n" - - html += " </tbody>\n" - html += "</table>" - return html + html += " </tbody>\n" + html += "</table>" + return html def analyseMarkdown(filename:str) -> Document: """ Analyse the markdown file and split it into clauses.