Skip to content
Snippets Groups Projects
Commit be71662c authored by Andreas Kraft's avatar Andreas Kraft
Browse files

Fixed some regex

parent 582116cd
Branches
Tags
1 merge request!1Restructuring and cleaning scripts for Mkdocs
......@@ -13,6 +13,8 @@ from regexMatches import *
_alignLeft = 'align="left"'
_alignRight = 'align="right"'
_alignCenter = 'align="center"'
_nextListElementMark = '' # Marks a continuing list in the line before. !!! Must be a single character
printInfo = print
printDebug = print
......@@ -157,7 +159,6 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
defaultAlignments:list[str] = []
headerDelimiterPositions:list[int] = []
delimiterPositions:list[int] = []
nextListElementMark = '@'
# Split the input into lines
lines:list[str] = [line for line in gridTable.rstrip().split('\n')]
......@@ -178,10 +179,10 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
if _c.startswith('- '): # List in a cell
cell.listFlag = True
_c = re.sub(r'\\\s*$', '\n', _c)
cell.content = _c + nextListElementMark # Add list element end mark to know when the list element ends
cell.content = _c + _nextListElementMark # Add list element end mark to know when the list element ends
elif cell.listFlag and len(_c) > 0: # any other content when handling list is concatenated to the last list element
_c = re.sub(r'\\\s*$', '\n', _c)
cell.content = _c + nextListElementMark #add the list element end mark
cell.content = _c + _nextListElementMark #add the list element end mark
elif not _c: # empty line. separation between list and other paragraph
# cell.content = '\n' if not cell.content.endswith('\n') else ""
cell.content = '\n' # cell content is always empty / None here.
......@@ -194,11 +195,13 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
#cell['content'] = cell['content'].strip("\n")
cell.listFlag = True
_c = re.sub(r'\\\s*$', '\n', _c)
cell.content += _c + nextListElementMark # Add list element end mark to know when the list element ends
cell.content += _c + _nextListElementMark # Add list element end mark to know when the list element ends
elif cell.listFlag and len(_c) > 0: # any other content when handling list is concatenated to the last list element
cell.content = cell.content.strip(nextListElementMark) #remove list element end mark
# cell.content = cell.content.strip(nextListElementMark) #remove list element end mark
cell.content = cell.content.removesuffix(_nextListElementMark) #remove list element end mark
_c = re.sub(r'\\\s*$', '\n', _c)
cell.content += " " + _c + nextListElementMark #add list element end mark
cell.content += ' ' + _c + _nextListElementMark #add list element end mark
elif len(_c) == 0: # separation between list and other paragraph
if cell.listFlag:
cell.listFlag = False
......@@ -222,8 +225,6 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
row[columnIndex].colspan += 1
if position == delimiterPositions[len(delimiterPositions) - 1]: # last cell in row, adjust colspan to get max number columns
colspan_allocated = row[columnIndex].colspan
#for cell_index in range(number_of_parts):
# colspan_allocated += row[cell_index].colspan
row[columnIndex].colspan += numberOfColumns - colspan_allocated - columnIndex
elif position < delimiterPositions[j]:
raise ValueError("Wrong cell formatting")
......@@ -435,7 +436,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
continue
else:
raise ValueError('More cells than columns found')
raise ValueError(f'More cells than columns found ({len(cellsContent)} {numberOfColumns})')
else: # Data row
cellsContent = re.split(r'\|', line.strip('|'))
......@@ -462,7 +463,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
rowIndex = rowsTracker[columnIndex]
handleCellContent(rows[rowIndex][columnIndex], content)
else:
raise ValueError('More cells than columns found')
raise ValueError(f'More cells than columns found ({len(cellsContent)} {numberOfColumns})')
else:
raise ValueError('No separator line found for row starting')
......@@ -489,10 +490,11 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
# Replacing "<" by &lt;
cell.content = cell.content.replace('<', '&lt;')
# Bold replacements
# Regex to detect markdown bold formatting in cell content
if cell.content is not None:
cell.content = matchBold.sub(r'<strong>\g<text></strong>', cell.content)
cell.content = matchBold.sub(r'\1<strong>\g<text></strong>', cell.content)
# Italic replacements
# Regex to detect markdown italic formatting in cell content
......@@ -560,6 +562,9 @@ def generateHtmlTableWithSpans(gridTable:str) -> str:
Returns:
The HTML table in string format.
"""
regex1 = r'\s*([-*+]|\s*\d+\.)\s+((?:(?!' + re.escape(_nextListElementMark) + r').)+)' + re.escape(_nextListElementMark)
regex2 = r'(\s*([-*+]|\s*\d+\.)\s+(?:(?!∆).)+' + re.escape(_nextListElementMark) + r')+'
try:
gridHeader, gridBody = parseGridTableWithSpans(gridTable)
except Exception as e:
......@@ -585,13 +590,13 @@ def generateHtmlTableWithSpans(gridTable:str) -> str:
continue
else:
# Prepare content, in case there's a list
if cell.content is not None and (matches := re.findall(r'\s*([-*+]|\s*\d+\.)\s+((?:(?!@).)+)@', cell.content)): # Update cell in new row
if cell.content is not None and (matches := re.findall(regex1, cell.content)): # Update cell in new row
list = '<ul>'
# Build list the matches
for match in matches:
list += '<li>' + match[1] + '</li>'
list += '</ul>'
cell.content = re.sub(r'(\s*([-*+]|\s*\d+\.)\s+(?:(?!@).)+@)+', list, cell.content)
cell.content = re.sub(regex2, list, cell.content)
# Enforce left alignment if cell contains a list
cell.alignment = _alignLeft
......@@ -610,13 +615,13 @@ def generateHtmlTableWithSpans(gridTable:str) -> str:
continue
else:
#Prepare content, in case there's a list
if cell.content is not None and (matches := re.findall(r'\s*([-*+]|\s*\d+\.)\s+((?:(?!@).)+)@', cell.content)): # Update cell in new row
if cell.content is not None and (matches := re.findall(regex1, cell.content)): # Update cell in new row
list = '<ul>'
# Build list the matches
for match in matches:
list += f'<li>{match[1]}</li>'
list += '</ul>'
cell.content = re.sub(r'(\s*([-*+]|\s*\d+\.)\s+(?:(?!@).)+@)+', list, cell.content)
cell.content = re.sub(regex2, list, cell.content)
# Enforce left alignment if cell contains a list
cell.alignment = _alignLeft
......
......@@ -36,5 +36,5 @@ matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECAS
matchTable = re.compile(r'^\s*\|.*\|\s*$', re.IGNORECASE)
matchTableSeparator = re.compile(r'^\s*\|([-: ]+\|)+\s*$', re.IGNORECASE)
matchBold = re.compile(r'(?<!\S)(\*\*|__)(?P<text>.+?)(?<!\\)\1(?!\S)')
matchItalic = re.compile(r'(?<!\S)(\*|_)(?P<text>.+?)(?<!\\)\1(?!\S)')
matchBold = re.compile(r'(^|\s)(\*\*|__)(?P<text>.+?)\2(?!\w)')
matchItalic = re.compile(r'(^|\s)(\*|_)(?P<text>.+?)(?<!\\)\3(\s|$)')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment