Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
S
Scripts
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Specification Tools
Scripts
Commits
6c8a9ddc
Commit
6c8a9ddc
authored
4 months ago
by
Miguel Angel Reina Ortega
Browse files
Options
Downloads
Patches
Plain Diff
Some improvements for grid tables conversion
parent
9d0a1d23
No related branches found
No related tags found
1 merge request
!1
Restructuring and cleaning scripts for Mkdocs
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
toMkdocs/gridTableTools.py
+146
-38
146 additions, 38 deletions
toMkdocs/gridTableTools.py
with
146 additions
and
38 deletions
toMkdocs/gridTableTools.py
+
146
−
38
View file @
6c8a9ddc
...
...
@@ -27,12 +27,13 @@ class GridCell:
self
.
auxiliarIndex
:
int
=
0
def
calculateAndSetAlignment
(
self
,
headerDelimiterPositions
:
list
[
int
],
defaultAlignments
:
list
[
str
]
)
->
None
:
def
calculateAndSetAlignment
(
self
)
->
None
:
"""
Set the alignment of the cell based on the position of the delimiter.
"""
if
self
.
position
is
None
:
raise
ValueError
(
'
Cell position must be set before calculating alignment.
'
)
if
hasHeader
:
headerDelimiterIndex
=
0
while
headerDelimiterIndex
<
len
(
defaultAlignments
)
and
self
.
position
>
headerDelimiterPositions
[
headerDelimiterIndex
]:
headerDelimiterIndex
+=
1
...
...
@@ -44,6 +45,18 @@ class GridCell:
headerDelimiterIndex
+=
1
else
:
raise
ValueError
(
'
Invalid table formatting
'
)
else
:
body_delimiter_index
=
0
while
body_delimiter_index
in
range
(
len
(
defaultAlignments
))
and
self
.
position
>
delimiterPositions
[
body_delimiter_index
]:
body_delimiter_index
+=
1
if
body_delimiter_index
in
range
(
len
(
defaultAlignments
)):
if
self
.
position
<
delimiterPositions
[
body_delimiter_index
]:
self
.
alignment
=
defaultAlignments
[
body_delimiter_index
]
elif
self
.
position
==
delimiterPositions
[
body_delimiter_index
]:
self
.
alignment
=
defaultAlignments
[
body_delimiter_index
]
body_delimiter_index
+=
1
else
:
raise
ValueError
(
"
Invalid table formatting
"
)
def
__str__
(
self
):
...
...
@@ -78,6 +91,48 @@ class GridRow():
def
__repr__
(
self
):
return
self
.
__str__
()
def
check_delimiter_alignment
(
line
:
str
,
delimiters
:
str
=
"
|+
"
)
->
bool
:
"""
Check if delimiters in a row align with expected positions.
Args:
line: The line of text to check
delimiter_positions: List of expected positions (based on + characters)
delimiters: String containing valid delimiter characters (default:
"
|+
"
)
Returns:
bool: True if delimiters align correctly, False otherwise
"""
if
not
line
or
not
delimiterPositions
:
return
False
print
(
f
"
\n
Checking line:
'
{
line
}
'"
)
print
(
f
"
Expected delimiter positions:
{
delimiterPositions
}
"
)
# For full separator lines (only +)
if
'
+
'
in
line
and
'
|
'
not
in
line
:
currentPositions
=
[
i
for
i
,
char
in
enumerate
(
line
)
if
(
char
==
'
+
'
and
i
!=
0
)]
print
(
f
"
Full separator line - Found + at positions:
{
currentPositions
}
"
)
return
all
(
delimiterPositions
[
-
1
]
in
currentPositions
and
line
.
startswith
(
"
+
"
)
and
pos
in
delimiterPositions
for
pos
in
currentPositions
)
# For data lines (only |)
if
'
|
'
in
line
and
'
+
'
not
in
line
:
currentPositions
=
[
i
for
i
,
char
in
enumerate
(
line
)
if
(
char
==
'
|
'
and
i
!=
0
)]
print
(
f
"
Data line - Found | at positions:
{
current_positions
}
"
)
return
all
(
delimiterPositions
[
-
1
]
in
currentPositions
and
line
.
startswith
(
"
|
"
)
and
pos
in
delimiterPositions
for
pos
in
currentPositions
)
# For partial separators (mix of + and |)
currentPositions
=
[
i
for
i
,
char
in
enumerate
(
line
)
if
(
char
in
delimiters
and
i
!=
0
)]
print
(
f
"
Partial separator - Found delimiters at positions:
{
currentPositions
}
"
)
print
(
f
"
Characters at those positions:
{
[
line
[
pos
]
for
pos
in
currentPositions
]
}
"
)
return
all
(
delimiterPositions
[
-
1
]
in
currentPositions
and
(
line
.
startswith
(
"
+
"
)
or
line
.
startswith
(
"
|
"
))
and
pos
in
delimiterPositions
for
pos
in
currentPositions
)
class
GridRowsTracker
():
"""
Represents the document object.
"""
...
...
@@ -112,6 +167,14 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
:param pandoc_table: String of the Pandoc-style grid table.
:return: List of lists representing the table with metadata for spans.
"""
global
hasHeader
,
defaultAlignments
,
headerDelimiterPositions
,
delimiterPositions
,
nextListElementMark
# Initialize globals
hasHeader
=
False
defaultAlignments
:
list
[
str
]
=
[]
headerDelimiterPositions
:
list
[
int
]
=
[]
delimiterPositions
:
list
[
int
]
=
[]
nextListElementMark
=
'
@
'
# Split the input into lines
lines
:
list
[
str
]
=
[
line
.
strip
()
for
line
in
gridTable
.
strip
().
split
(
'
\n
'
)]
...
...
@@ -131,14 +194,13 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
cell
.
colspan
+=
1
if
_c
.
startswith
(
'
-
'
):
# List in a cell
cell
.
listFlag
=
True
cell
.
content
=
_c
+
'
\n
'
# Add newline to know when the list element ends
_c
=
re
.
sub
(
r
'
\\\s*$
'
,
'
\n
'
,
_c
)
cell
.
content
=
_c
+
nextListElementMark
# Add list element end mark to know when the list element ends
elif
cell
.
listFlag
and
len
(
_c
)
>
0
:
# any other content when handling list is concatenated to the last list element
cell
.
content
=
_c
+
'
\n
'
_c
=
re
.
sub
(
r
'
\\\s*$
'
,
'
\n
'
,
_c
)
cell
.
content
+=
_c
+
nextListElementMark
#add the list element end mark
elif
not
_c
:
# separation between list and other paragraph
cell
.
listFlag
=
False
cell
.
content
=
'
\n
'
#if not cell['content'].endswith("\n") else ""
cell
.
content
+=
'
\n
'
if
not
cell
[
'
content
'
].
endswith
(
'
\n
'
)
else
""
else
:
cell
.
content
=
re
.
sub
(
r
'
\\\s*$
'
,
'
\n
'
,
_c
)
else
:
# Cell has content
...
...
@@ -147,11 +209,16 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
cell
.
content
+=
'
\n
'
#cell['content'] = cell['content'].strip("\n")
cell
.
listFlag
=
True
cell
.
content
+=
_c
+
'
\n
'
# Add newline to know when the list element ends
elif
cell
.
listFlag
and
_c
:
# any other content when handling list is concatenated to the last list element
cell
.
content
=
cell
.
content
.
strip
(
'
\n
'
)
+
'
'
+
_c
+
'
\n
'
_c
=
re
.
sub
(
r
'
\\\s*$
'
,
'
\n
'
,
_c
)
cell
.
content
+=
_c
+
nextListElementMark
# Add list element end mark to know when the list element ends
elif
cell
.
listFlag
and
len
(
_c
)
>
0
:
# any other content when handling list is concatenated to the last list element
cell
.
content
=
cell
.
content
.
strip
(
nextListElementMark
)
#remove list element end mark
_c
=
re
.
sub
(
r
'
\\\s*$
'
,
'
\n
'
,
_c
)
cell
.
content
+=
"
"
+
_c
+
nextListElementMark
#add list element end mark
elif
len
(
_c
)
==
0
:
# separation between list and other paragraph
cell
.
listFlag
=
False
if
cell
.
list_flag
:
cell
.
list_flag
=
False
cell
.
content
+=
'
\n\n
'
#end list by \n
#content = re.sub(r'\\\s*$', "\n", content.strip())
cell
.
content
+=
'
\n
'
if
not
cell
.
content
.
endswith
(
'
\n
'
)
else
''
else
:
...
...
@@ -202,11 +269,8 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
# Determine delimter positions and alignments
hasHeader
=
False
headerDelimiterPositions
:
list
[
int
]
=
[]
headerRows
:
GridTableRowList
=
[]
dataRows
:
GridTableRowList
=
[]
defaultAlignments
:
list
[
str
]
=
[]
for
index
in
separatorIndices
:
if
matchGridTableHeaderSeparator
.
match
(
lines
[
index
]):
...
...
@@ -217,7 +281,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
for
partIndex
in
range
(
len
(
parts
)):
if
parts
[
partIndex
].
startswith
(
'
:
'
)
and
not
parts
[
partIndex
].
endswith
(
'
:
'
):
# Left alignment
defaultAlignments
.
append
(
'
align=
"
left
"'
)
elif
not
parts
[
partIndex
].
startswith
(
"
:
"
)
and
parts
[
partIndex
].
endswith
(
"
:
"
):
# Right alignment
elif
not
parts
[
partIndex
].
startswith
(
'
:
'
)
and
parts
[
partIndex
].
endswith
(
'
:
'
):
# Right alignment
defaultAlignments
.
append
(
'
align=
"
right
"'
)
else
:
defaultAlignments
.
append
(
'
align=
"
center
"'
)
# Center alignment
...
...
@@ -226,6 +290,18 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
delPositions
=
[
lines
[
index
].
find
(
delimiter
,
delimiterPositionsStart
+
1
)
for
delimiter
in
'
+
'
if
delimiter
in
lines
[
index
][
delimiterPositionsStart
+
1
:]]
headerDelimiterPositions
.
append
(
min
(
delPositions
)
if
delPositions
else
-
1
)
if
not
hasHeader
:
#Set default alignments from the first separator
parts
=
re
.
split
(
r
'
\+
'
,
lines
[
0
].
strip
(
'
+
'
))
default_alignments
=
[]
# Calculate default alignments and positions of delimiters
for
part_index
in
range
(
len
(
parts
)):
if
parts
[
part_index
].
startswith
(
'
:
'
)
and
not
parts
[
part_index
].
endswith
(
'
:
'
):
default_alignments
.
append
(
'
align=
"
left
"'
)
elif
not
parts
[
part_index
].
startswith
(
'
:
'
)
and
parts
[
part_index
].
endswith
(
'
:
'
):
default_alignments
.
append
(
'
align=
"
right
"'
)
else
:
default_alignments
.
append
(
'
align=
"
center
"'
)
for
rowNumber
in
range
(
len
(
separatorIndices
)
-
1
):
rows
:
list
[
GridRow
]
=
[]
...
...
@@ -238,6 +314,10 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
for
line
in
rowLines
:
if
isSeparator
(
line
)
and
not
inDataRow
:
inDataRow
=
True
# Add delimiter alignment check for separator lines
if
not
check_delimiter_alignment
(
line
,
delimiterPositions
):
raise
ValueError
(
f
"
Misaligned delimiters in separator row:
{
line
}
"
)
parts
=
re
.
split
(
r
'
\s*\+\s*
'
,
line
.
strip
(
'
+
'
))
delimiterIndex
=
0
...
...
@@ -254,7 +334,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
cell
.
position
=
delimiterIndex
# Position of cell delimiter +
# Set alignment as defined by header separator line
cell
.
calculateAndSetAlignment
(
headerDelimiterPositions
,
defaultAlignments
)
cell
.
calculateAndSetAlignment
()
while
delimiterIndex
>
delimiterPositions
[
columnIndex
]:
columnIndex
+=
1
...
...
@@ -263,7 +343,11 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
elif
inDataRow
:
# Regular data row or partial separator
if
matchGridTableBodySeparator
.
match
(
line
):
# Partial separator
cellsContent
=
re
.
split
(
r
"
[\|\+]
"
,
line
.
strip
(
"
|
"
).
strip
(
"
+
"
))
# (?<!\\)[\|\+]
# Add delimiter alignment check for partial separators
if
not
check_delimiter_alignment
(
line
,
delimiterPositions
):
raise
ValueError
(
f
"
Misaligned delimiters in partial separator:
{
line
}
"
)
cellsContent
=
re
.
split
(
r
"
[\|\+]
"
,
line
.
strip
(
'
|
'
).
strip
(
'
+
'
))
# (?<!\\)[\|\+]
#Add another row, set delimiters for each cell
rows
.
append
(
GridRow
(
numberOfColumns
))
auxDelimiterIndex
=
0
...
...
@@ -274,7 +358,7 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
auxDelimiterIndex
+=
len
(
content
)
+
1
cell
=
rows
[
-
1
][
auxiliarCellIndex
]
cell
.
position
=
auxDelimiterIndex
# Position of cell delimiter +
cell
.
calculateAndSetAlignment
(
headerDelimiterPositions
,
defaultAlignments
)
cell
.
calculateAndSetAlignment
()
while
auxDelimiterIndex
>
delimiterPositions
[
auxiliarCellIndex
]:
auxiliarCellIndex
+=
1
auxiliarCellIndex
+=
1
...
...
@@ -318,7 +402,13 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
raise
ValueError
(
"
More cells than columns found
"
)
else
:
# Data row
cellsContent
=
re
.
split
(
r
'
\s*\|\s*
'
,
line
.
strip
(
'
|
'
))
cellsContent
=
line
.
strip
()
cellsContent
=
re
.
split
(
r
"
\|
"
,
line
.
strip
(
'
|
'
))
# Add delimiter alignment check
if
not
check_delimiter_alignment
(
line
,
delimiterPositions
):
raise
ValueError
(
f
"
Misaligned delimiters in row:
{
line
}
"
)
columnCellIndex
=
0
if
len
(
cellsContent
)
<
numberOfColumns
:
# Colspan: Positions of | with respect to + need to be determined
for
columnIndex
,
content
in
enumerate
(
cellsContent
):
...
...
@@ -347,6 +437,10 @@ def parseGridTableWithSpans(gridTable:str) -> tuple[GridTableRowList, GridTableR
elif
hasHeader
and
start
<
headerSeparatorIndex
:
# table_row and auxiliar_row are part of header_rows
for
row
in
rows
:
# header rows
headerRows
.
append
(
row
.
cells
)
else
:
#only body
for
row
in
rows
:
dataRows
.
append
(
row
.
cells
)
# Check if there are any data rows
if
not
dataRows
and
not
headerRows
:
...
...
@@ -432,13 +526,27 @@ def generateHtmlTableWithSpans(gridTable:str) -> str:
Returns:
The HTML table in string format.
"""
debug_output
=
[]
def
debug_print
(
msg
):
debug_output
.
append
(
str
(
msg
))
# Convert message to string
try
:
# Redirect print statements to our debug collector
global
print
original_print
=
print
print
=
debug_print
gridHeader
,
gridBody
=
parseGridTableWithSpans
(
gridTable
)
# Restore original print
print
=
original_print
except
Exception
as
e
:
import
traceback
traceback
.
print_exc
()
return
f
'
HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE.
CHECK LOGS.
{
e
}
'
debug_print
(
"
Grid table could not be generated
"
)
debug_text
=
"
<br>
"
.
join
(
debug_output
)
# Now all items are strings
return
f
'
HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE.
<br><pre>
{
debug_text
}
</pre>
'
# Generate table HTML...
html
=
'
<table>
\n
'
hasHeader
=
False
...
...
@@ -457,13 +565,13 @@ def generateHtmlTableWithSpans(gridTable:str) -> str:
continue
else
:
# Prepare content, in case there's a list
if
cell
.
content
is
not
None
and
(
matches
:
=
re
.
findall
(
r
"
\s*([-*+]|\s*\d+\.)\s+(
[^<]+)<br \/>
"
,
cell
.
content
)):
# Update cell in new row
if
cell
.
content
is
not
None
and
(
matches
:
=
re
.
findall
(
r
"
\s*([-*+]|\s*\d+\.)\s+(
(?:(?!@).)+)@
"
,
cell
.
content
)):
# Update cell in new row
list
=
"
<ul>
"
# Build list the matches
for
match
in
matches
:
list
+=
"
<li>
"
+
match
[
1
]
+
"
</li>
"
list
+=
"
</ul>
"
cell
.
content
=
re
.
sub
(
r
"
(\s*([-*+]|\s*\d+\.)\s+
[^<]+<br \/>
)+
"
,
list
,
cell
.
content
)
cell
.
content
=
re
.
sub
(
r
"
(\s*([-*+]|\s*\d+\.)\s+
(?:(?!@).)+@
)+
"
,
list
,
cell
.
content
)
# Enforce left alignment if cell contains a list
cell
.
alignment
=
"
align=
\"
left
\"
"
...
...
@@ -482,13 +590,13 @@ def generateHtmlTableWithSpans(gridTable:str) -> str:
continue
else
:
#Prepare content, in case there's a list
if
cell
.
content
is
not
None
and
(
matches
:
=
re
.
findall
(
r
"
\s*([-*+]|\s*\d+\.)\s+(
[^<]+)<br \/>
"
,
cell
.
content
)):
# Update cell in new row
if
cell
.
content
is
not
None
and
(
matches
:
=
re
.
findall
(
r
"
\s*([-*+]|\s*\d+\.)\s+(
(?:(?!@).)+)@
"
,
cell
.
content
)):
# Update cell in new row
list
=
"
<ul>
"
# Build list the matches
for
match
in
matches
:
list
+=
"
<li>
"
+
match
[
1
]
+
"
</li>
"
list
+=
"
</ul>
"
cell
.
content
=
re
.
sub
(
r
"
(\s*([-*+]|\s*\d+\.)\s+
[^<]+<br \/>
)+
"
,
list
,
cell
.
content
)
cell
.
content
=
re
.
sub
(
r
"
(\s*([-*+]|\s*\d+\.)\s+
(?:(?!@).)+@
)+
"
,
list
,
cell
.
content
)
# Enforce left alignment if cell contains a list
cell
.
alignment
=
"
align=
\"
left
\"
"
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment