Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
S
Scripts
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Specification Tools
Scripts
Commits
bb284002
Commit
bb284002
authored
7 months ago
by
Miguel Angel Reina Ortega
Browse files
Options
Downloads
Patches
Plain Diff
Some cleanup for handling of grid tables
parent
708d9fb8
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
toMkdocs/toMkdocs.py
+103
-113
103 additions, 113 deletions
toMkdocs/toMkdocs.py
with
103 additions
and
113 deletions
toMkdocs/toMkdocs.py
+
103
−
113
View file @
bb284002
...
@@ -7,6 +7,8 @@
...
@@ -7,6 +7,8 @@
# directory structure.
# directory structure.
#
#
from
__future__
import
annotations
from
__future__
import
annotations
import
logging
from
enum
import
Enum
,
auto
from
enum
import
Enum
,
auto
import
argparse
,
re
,
os
,
shutil
,
hashlib
,
base64
import
argparse
,
re
,
os
,
shutil
,
hashlib
,
base64
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
...
@@ -485,6 +487,19 @@ def parse_pandoc_table_with_spans(pandoc_table):
...
@@ -485,6 +487,19 @@ def parse_pandoc_table_with_spans(pandoc_table):
self
.
list_flag
=
False
self
.
list_flag
=
False
self
.
auxiliar_index
=
None
self
.
auxiliar_index
=
None
def
set_alignment
(
self
):
header_delimiter_index
=
0
while
header_delimiter_index
in
range
(
len
(
default_alignments
))
and
self
.
position
>
header_delimiter_positions
[
header_delimiter_index
]:
header_delimiter_index
+=
1
if
header_delimiter_index
in
range
(
len
(
default_alignments
)):
if
self
.
position
<
header_delimiter_positions
[
header_delimiter_index
]:
self
.
alignment
=
default_alignments
[
header_delimiter_index
]
elif
self
.
position
==
header_delimiter_positions
[
header_delimiter_index
]:
self
.
alignment
=
default_alignments
[
header_delimiter_index
]
header_delimiter_index
+=
1
else
:
raise
ValueError
(
"
Invalid table formatting
"
)
class
Row
():
class
Row
():
"""
Represents a row in the markdown file.
"""
"""
Represents a row in the markdown file.
"""
cells
:
list
[
Cell
]
=
[]
cells
:
list
[
Cell
]
=
[]
...
@@ -492,6 +507,12 @@ def parse_pandoc_table_with_spans(pandoc_table):
...
@@ -492,6 +507,12 @@ def parse_pandoc_table_with_spans(pandoc_table):
def
__init__
(
self
,
length
:
int
=
1
)
->
None
:
def
__init__
(
self
,
length
:
int
=
1
)
->
None
:
self
.
cells
=
[
Cell
()
for
_
in
range
(
length
)]
self
.
cells
=
[
Cell
()
for
_
in
range
(
length
)]
def
__getitem__
(
self
,
item
):
return
self
.
cells
[
item
]
def
__setitem__
(
self
,
key
,
value
):
self
.
cells
[
key
]
=
value
# Detect separator lines by pattern (it does not take into account partial separators
# Detect separator lines by pattern (it does not take into account partial separators
def
is_separator
(
line
):
def
is_separator
(
line
):
return
_matchGridTableSeparator
.
match
(
line
)
return
_matchGridTableSeparator
.
match
(
line
)
...
@@ -573,7 +594,7 @@ def parse_pandoc_table_with_spans(pandoc_table):
...
@@ -573,7 +594,7 @@ def parse_pandoc_table_with_spans(pandoc_table):
has_header
=
True
has_header
=
True
header_separator_index
=
index
header_separator_index
=
index
header_rows
=
[]
header_rows
=
[]
parts
=
re
.
split
(
r
"
\
s*\+\s*
"
,
lines
[
index
].
strip
(
"
+
"
))
parts
=
re
.
split
(
r
"
\
+
"
,
lines
[
index
].
strip
(
"
+
"
))
default_alignments
=
[]
default_alignments
=
[]
#Calculate default alignments and positions of delimiters
#Calculate default alignments and positions of delimiters
for
part_index
in
range
(
len
(
parts
)):
for
part_index
in
range
(
len
(
parts
)):
...
@@ -592,9 +613,6 @@ def parse_pandoc_table_with_spans(pandoc_table):
...
@@ -592,9 +613,6 @@ def parse_pandoc_table_with_spans(pandoc_table):
for
row
in
range
(
len
(
separator_indices
)
-
1
):
for
row
in
range
(
len
(
separator_indices
)
-
1
):
table_row
=
[]
table_row
=
[]
auxiliar_rows
=
[]
auxiliar_rows
=
[]
auxiliar_row
=
[]
use_auxiliar_row
=
[]
list_flags
=
[]
has_merged_cells
=
False
has_merged_cells
=
False
in_data_row
=
False
in_data_row
=
False
start
,
end
=
separator_indices
[
row
],
separator_indices
[
row
+
1
]
start
,
end
=
separator_indices
[
row
],
separator_indices
[
row
+
1
]
...
@@ -623,45 +641,31 @@ def parse_pandoc_table_with_spans(pandoc_table):
...
@@ -623,45 +641,31 @@ def parse_pandoc_table_with_spans(pandoc_table):
table_row
=
Row
(
number_of_columns_row
)
table_row
=
Row
(
number_of_columns_row
)
for
i
in
range
(
number_of_columns_row
):
for
i
in
range
(
number_of_columns_row
):
delimiter_index
+=
len
(
parts
[
i
])
+
1
delimiter_index
+=
len
(
parts
[
i
])
+
1
table_row
.
cells
[
i
].
alignment
=
default_alignments
[
i
]
if
i
==
0
else
"
align=
\"
center
\"
"
table_row
[
i
].
alignment
=
default_alignments
[
i
]
if
i
==
0
else
"
align=
\"
center
\"
"
table_row
.
cells
[
i
].
position
=
delimiter_index
# Position of cell delimiter +
table_row
[
i
].
position
=
delimiter_index
# Position of cell delimiter +
#Set alignment as defined by header separator line
#Set alignment as defined by header separator line
while
header_delimiter_index
in
range
(
len
(
default_alignments
))
and
table_row
.
cells
[
i
].
position
>
header_delimiter_positions
[
header_delimiter_index
]:
table_row
[
i
].
set_alignment
()
header_delimiter_index
+=
1
if
header_delimiter_index
in
range
(
len
(
default_alignments
)):
if
table_row
.
cells
[
i
].
position
<
header_delimiter_positions
[
header_delimiter_index
]:
table_row
.
cells
[
i
].
alignment
=
default_alignments
[
header_delimiter_index
]
elif
table_row
.
cells
[
i
].
position
==
header_delimiter_positions
[
header_delimiter_index
]:
table_row
.
cells
[
i
].
alignment
=
default_alignments
[
i
]
header_delimiter_index
+=
1
else
:
raise
ValueError
(
"
Invalid table formatting
"
)
#auxiliar_row = Row(number_of_columns)
#for i in range(number_of_columns):
#auxiliar_row.append(default_cell)
#use_auxiliar_row.append(False)
#auxiliar_rows.append({'auxiliar_row':auxiliar_row, 'use_auxiliar':use_auxiliar_row, 'list_flags':list_flags})
elif
in_data_row
:
elif
in_data_row
:
# Regular data row or partial separator
# Regular data row or partial separator
if
_matchGridTableBodySeparator
.
match
(
line
):
# Partial separator
if
_matchGridTableBodySeparator
.
match
(
line
):
# Partial separator
has_merged_cells
=
True
has_merged_cells
=
True
cells
=
re
.
split
(
r
"
[\|\+]
"
,
line
.
strip
(
"
|
"
).
strip
(
"
+
"
))
# (?<!\\)[\|\+]
#Add auxiliar line, set delimiters for each cell
#Add auxiliar line, set delimiters for each cell
auxiliar_rows
.
append
(
Row
(
number_of_columns
))
auxiliar_rows
.
append
(
Row
(
number_of_columns
))
aux_delimiter_index
=
0
aux_delimiter_index
=
0
for
i
in
range
(
number_of_columns_row
):
for
auxiliar_cell_index
in
range
(
number_of_columns
):
aux_delimiter_index
+=
len
(
parts
[
i
])
+
1
aux_delimiter_index
+=
len
(
cells
[
auxiliar_cell_index
])
+
1
auxiliar_rows
[
-
1
].
cells
[
i
].
position
=
aux_delimiter_index
# Position of cell delimiter +
auxiliar_rows
[
-
1
][
auxiliar_cell_index
].
position
=
aux_delimiter_index
# Position of cell delimiter +
auxiliar_rows
[
-
1
][
i
].
set_alignment
()
cells
=
re
.
split
(
r
"
\s*[\|\+]\s*
"
,
line
.
strip
(
"
|
"
).
strip
(
"
+
"
))
# (?<!\\)[\|\+]
if
len
(
cells
)
<=
number_of_columns
:
# Colspan: Positions of | with respect to + need to be determined
if
len
(
cells
)
<=
number_of_columns
:
# Colspan: Positions of | with respect to + need to be determined
for
i
in
range
(
len
(
cells
)):
for
i
in
range
(
len
(
cells
)):
if
_matchGridTableBodySeparatorLine
.
match
(
cells
[
i
]):
# A new row is to be added
if
_matchGridTableBodySeparatorLine
.
match
(
cells
[
i
]):
# A new row is to be added
#auxiliar_rows[-1]['use_auxiliar_row'][i] = True
#auxiliar_rows[-1]['use_auxiliar_row'][i] = True
auxiliar_rows
[
-
1
]
.
cells
[
i
].
list_flag
=
False
auxiliar_rows
[
-
1
][
i
].
list_flag
=
False
table_row
.
cells
[
i
].
auxiliar_index
=
len
(
auxiliar_rows
)
-
1
table_row
[
i
].
auxiliar_index
=
len
(
auxiliar_rows
)
-
1
#if cells[i].startswith(":") and not cells[i].endswith(":"):
#if cells[i].startswith(":") and not cells[i].endswith(":"):
# auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"left\""
# auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"left\""
#elif not cells[i].startswith(":") and cells[i].endswith(":"):
#elif not cells[i].startswith(":") and cells[i].endswith(":"):
...
@@ -670,37 +674,20 @@ def parse_pandoc_table_with_spans(pandoc_table):
...
@@ -670,37 +674,20 @@ def parse_pandoc_table_with_spans(pandoc_table):
# auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"center\""
# auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"center\""
else
:
else
:
# Handle content of the cell
# Handle content of the cell
if
table_row
.
cells
[
i
].
auxiliar_index
is
not
None
:
# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
if
table_row
[
i
].
auxiliar_index
is
not
None
:
# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
][
i
]
=
handling_content
(
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
][
i
],
cells
[
i
])
auxiliar_rows
[
table_row
[
i
].
auxiliar_index
][
i
]
=
handling_content
(
auxiliar_rows
[
table_row
[
i
].
auxiliar_index
][
i
],
cells
[
i
])
if
not
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
][
i
].
colspan_adjusted
:
if
not
auxiliar_rows
[
table_row
[
i
].
auxiliar_index
][
i
].
colspan_adjusted
:
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
][
i
].
colspan_adjusted
=
True
auxiliar_rows
[
table_row
[
i
].
auxiliar_index
][
i
].
colspan_adjusted
=
True
# TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
# TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
][
i
]
=
adjust_colspan
(
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
],
i
,
len
(
cells
),
line
,
number_of_columns
,
delimiter_positions
)
auxiliar_rows
[
table_row
[
i
].
auxiliar_index
][
i
]
=
adjust_colspan
(
auxiliar_rows
[
table_row
[
i
].
auxiliar_index
],
i
,
len
(
cells
),
line
,
number_of_columns
,
delimiter_positions
)
else
:
else
:
table_row
.
cells
[
i
]
=
handling_content
(
table_row
.
cells
[
i
],
cells
[
i
])
table_row
[
i
]
=
handling_content
(
table_row
[
i
],
cells
[
i
])
# Cell which is not separator
# Cell which is not separator
table_row
.
cells
[
i
].
rowspan
+=
1
table_row
[
i
].
rowspan
+=
1
if
not
table_row
.
cells
[
i
].
colspan_adjusted
:
if
not
table_row
.
cells
[
i
].
colspan_adjusted
:
table_row
.
cells
[
i
].
colspan_adjusted
=
True
table_row
[
i
].
colspan_adjusted
=
True
#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
table_row
.
cells
[
i
]
=
adjust_colspan
(
table_row
.
cells
,
i
,
len
(
cells
),
line
,
number_of_columns
,
delimiter_positions
)
table_row
[
i
]
=
adjust_colspan
(
table_row
,
i
,
len
(
cells
),
line
,
number_of_columns
,
delimiter_positions
)
#elif len(cells) == number_of_columns: # Simple row with partial separator, # A new row is to be added
# for i in range(len(cells)):
# if _matchGridTableBodySeparatorLine.match(cells[i]): # Update cell in new row
# use_auxiliar_row[i] = True
# list_flags[i] = False
# if cells[i].startswith(":") and not cells[i].endswith(":"):
# auxiliar_row[i]['alignment'] = "align=\"left\""
# elif not cells[i].startswith(":") and cells[i].endswith(":"):
# auxiliar_row[i]['alignment'] = "align=\"right\""
# else:
# auxiliar_row[i]['alignment'] = "align=\"center\""
# else:
# #Handle content of the cell
# list_flags[i], table_row[i] = handling_content(table_row[i], cells[i],list_flags[i])
# # Cell which is not separator
# table_row[i]['rowspan'] += 1
# # Adjusting of colspan not needed, no colspan as number of cells is equal to number of columns
else
:
else
:
raise
ValueError
(
"
More cells than columns found
"
)
raise
ValueError
(
"
More cells than columns found
"
)
else
:
# Data row
else
:
# Data row
...
@@ -708,30 +695,29 @@ def parse_pandoc_table_with_spans(pandoc_table):
...
@@ -708,30 +695,29 @@ def parse_pandoc_table_with_spans(pandoc_table):
if
len
(
cells
)
<
number_of_columns
:
# Colspan: Positions of | with respect to + need to be determined
if
len
(
cells
)
<
number_of_columns
:
# Colspan: Positions of | with respect to + need to be determined
for
i
in
range
(
len
(
cells
)):
for
i
in
range
(
len
(
cells
)):
# Handle content of the cell
# Handle content of the cell
if
table_row
.
cells
[
i
].
auxiliar_index
is
not
None
:
# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
if
table_row
[
i
].
auxiliar_index
is
not
None
:
# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
]
.
cells
[
i
]
=
handling_content
(
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
]
.
cells
[
i
],
cells
[
i
])
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
][
i
]
=
handling_content
(
auxiliar_rows
[
table_row
[
i
].
auxiliar_index
][
i
],
cells
[
i
])
if
not
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
].
cells
[
i
].
colspan_adjusted
:
if
not
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
].
cells
[
i
].
colspan_adjusted
:
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
].
cells
[
i
].
colspan_adjusted
=
True
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
].
cells
[
i
].
colspan_adjusted
=
True
#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
]
.
cells
[
i
]
=
adjust_colspan
(
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
].
cells
,
i
,
len
(
cells
),
line
,
number_of_columns
,
delimiter_positions
)
auxiliar_rows
[
table_row
[
i
].
auxiliar_index
][
i
]
=
adjust_colspan
(
auxiliar_rows
[
table_row
[
i
].
auxiliar_index
].
cells
,
i
,
len
(
cells
),
line
,
number_of_columns
,
delimiter_positions
)
else
:
else
:
table_row
.
cells
[
i
]
=
handling_content
(
table_row
.
cells
[
i
],
cells
[
i
])
table_row
[
i
]
=
handling_content
(
table_row
[
i
],
cells
[
i
])
if
not
table_row
.
cells
[
i
].
colspan_adjusted
:
if
not
table_row
.
cells
[
i
].
colspan_adjusted
:
table_row
.
cells
[
i
].
colspan_adjusted
=
True
table_row
[
i
].
colspan_adjusted
=
True
table_row
.
cells
[
i
]
=
adjust_colspan
(
table_row
.
cells
,
i
,
len
(
cells
),
line
,
number_of_columns
,
delimiter_positions
)
table_row
[
i
]
=
adjust_colspan
(
table_row
.
cells
,
i
,
len
(
cells
),
line
,
number_of_columns
,
delimiter_positions
)
elif
len
(
cells
)
==
number_of_columns
:
# Simple row
elif
len
(
cells
)
==
number_of_columns
:
# Simple row
for
i
in
range
(
len
(
cells
)):
for
i
in
range
(
len
(
cells
)):
if
table_row
.
cells
[
i
].
auxiliar_index
is
not
None
:
# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
if
table_row
[
i
].
auxiliar_index
is
not
None
:
# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
]
.
cells
[
i
]
=
handling_content
(
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
]
.
cells
[
i
],
cells
[
i
])
auxiliar_rows
[
table_row
[
i
].
auxiliar_index
][
i
]
=
handling_content
(
auxiliar_rows
[
table_row
[
i
].
auxiliar_index
][
i
],
cells
[
i
])
else
:
else
:
# Handle content of the cell
# Handle content of the cell
table_row
.
cells
[
i
]
=
handling_content
(
table_row
.
cells
[
i
],
cells
[
i
])
table_row
[
i
]
=
handling_content
(
table_row
[
i
],
cells
[
i
])
else
:
else
:
raise
ValueError
(
"
More cells than columns found
"
)
raise
ValueError
(
"
More cells than columns found
"
)
else
:
else
:
raise
ValueError
(
"
No separator line found for row starting
"
)
raise
ValueError
(
"
No separator line found for row starting
"
)
if
has_header
and
start
>=
header_separator_index
:
# table_row and auxiliar_row are part of data_rows
if
has_header
and
start
>=
header_separator_index
:
# table_row and auxiliar_row are part of data_rows
data_rows
.
append
(
table_row
.
cells
)
data_rows
.
append
(
table_row
.
cells
)
if
has_merged_cells
:
if
has_merged_cells
:
...
@@ -759,7 +745,7 @@ def parse_pandoc_table_with_spans(pandoc_table):
...
@@ -759,7 +745,7 @@ def parse_pandoc_table_with_spans(pandoc_table):
for
cell
in
row
:
for
cell
in
row
:
if
cell
.
content
is
not
None
:
if
cell
.
content
is
not
None
:
# Replacing "<" by <
# Replacing "<" by <
cell
.
content
=
cell
.
content
.
replace
(
"
<
"
,
"
<
"
)
#
cell.content = cell.content.replace("<", "<")
#Bold
#Bold
for
bold_characters
in
[
"
**
"
,
"
__
"
]:
for
bold_characters
in
[
"
**
"
,
"
__
"
]:
...
@@ -828,8 +814,12 @@ def generate_html_table_with_spans(pandoc_table):
...
@@ -828,8 +814,12 @@ def generate_html_table_with_spans(pandoc_table):
:param pandoc_table: String of the Pandoc-style grid table.
:param pandoc_table: String of the Pandoc-style grid table.
:return: HTML string.
:return: HTML string.
"""
"""
try
:
grid_header
,
grid_body
=
parse_pandoc_table_with_spans
(
pandoc_table
)
grid_header
,
grid_body
=
parse_pandoc_table_with_spans
(
pandoc_table
)
except
:
logging
.
ERROR
(
"
Grid table could not be generated
"
)
return
"
HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS
"
else
:
html
=
"
<table>
\n
"
html
=
"
<table>
\n
"
has_header
=
False
has_header
=
False
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment