Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
S
Scripts
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Specification Tools
Scripts
Commits
708d9fb8
Commit
708d9fb8
authored
7 months ago
by
Miguel Angel Reina Ortega
Browse files
Options
Downloads
Patches
Plain Diff
Using class Cell and Row to handle grid tables conversion to html
parent
2451610e
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
toMkdocs/toMkdocs.py
+164
-117
164 additions, 117 deletions
toMkdocs/toMkdocs.py
with
164 additions
and
117 deletions
toMkdocs/toMkdocs.py
+
164
−
117
View file @
708d9fb8
...
...
@@ -420,6 +420,7 @@ _matchStandAloneImage = re.compile(r'^\s*!\[[^\]]*\]\(([^)]*)\)\s*', re.IGNORECA
_matchTable
=
re
.
compile
(
r
'
^\s*\|.*\|\s*$
'
,
re
.
IGNORECASE
)
_matchTableSeparator
=
re
.
compile
(
r
'
^\s*\|([-: ]+\|)+\s*$
'
,
re
.
IGNORECASE
)
_matchGridTable
=
re
.
compile
(
r
'
^\s*\+-.*\+\s$
'
,
re
.
IGNORECASE
)
_matchGridTableSeparator
=
re
.
compile
(
r
'
\s*\+([-:=]+\+)+\s*$
'
,
re
.
IGNORECASE
)
_matchGridTableBodySeparator
=
re
.
compile
(
r
'
.*\+([:-]+\+)+.*$
'
,
re
.
IGNORECASE
)
_matchGridTableHeaderSeparator
=
re
.
compile
(
r
'
.*\+([=:]+\+)+.*$
'
,
re
.
IGNORECASE
)
_matchGridTableBodySeparatorLine
=
re
.
compile
(
r
'
[-:]+$
'
,
re
.
IGNORECASE
)
...
...
@@ -463,58 +464,85 @@ def parse_pandoc_table_with_spans(pandoc_table):
# Split the input into lines
lines
=
[
line
.
strip
()
for
line
in
pandoc_table
.
strip
().
split
(
"
\n
"
)]
class
Cell
:
"""
Represents the document object.
"""
content
:
str
rowspan
:
int
colspan
:
int
colspan_adjusted
:
bool
alignment
:
str
position
:
int
list_flag
:
bool
auxiliar_index
:
int
def
__init__
(
self
):
self
.
content
=
None
self
.
rowspan
=
0
self
.
colspan
=
0
self
.
colspan_adjusted
=
False
self
.
alignment
=
"
align=
\"
center
\"
"
self
.
position
=
0
self
.
list_flag
=
False
self
.
auxiliar_index
=
None
class
Row
():
"""
Represents a row in the markdown file.
"""
cells
:
list
[
Cell
]
=
[]
def
__init__
(
self
,
length
:
int
=
1
)
->
None
:
self
.
cells
=
[
Cell
()
for
_
in
range
(
length
)]
# Detect separator lines by pattern (it does not take into account partial separators
def
is_separator
(
line
):
_matchGridTableSeparator
=
re
.
compile
(
r
'
\s*\+([-:=]+\+)+\s*$
'
,
re
.
IGNORECASE
)
return
_matchGridTableSeparator
.
match
(
line
)
def
handling_content
(
cell
,
content
,
list_flag
):
if
cell
[
'
content
'
]
is
None
:
cell
[
'
rowspan
'
]
+=
1
cell
[
'
colspan
'
]
+=
1
def
handling_content
(
cell
,
content
):
if
cell
.
content
is
None
:
cell
.
rowspan
+=
1
cell
.
colspan
+=
1
if
content
.
strip
().
startswith
(
"
-
"
):
# List
list_flag
=
True
print
(
content
)
cell
[
'
content
'
]
=
content
.
strip
()
+
"
\n
"
# Add newline to know when the list element ends
elif
list_flag
:
# any other content when handling list is concatenated to the last list element
cell
[
'
content
'
]
+=
content
.
strip
()
+
"
\n
"
elif
cells
[
i
].
strip
()
==
""
:
# separation between list and other paragraph
list_flag
=
False
cell
[
'
content
'
]
=
re
.
sub
(
r
'
\\\s*$
'
,
"
\n
"
,
content
)
cell
.
list_flag
=
True
#
print(content)
cell
.
content
=
content
.
strip
()
+
"
\n
"
# Add newline to know when the list element ends
elif
cell
.
list_flag
and
cells
[
i
].
strip
()
!=
""
:
# any other content when handling list is concatenated to the last list element
cell
.
content
+=
content
.
strip
()
+
"
\n
"
elif
cells
[
i
].
strip
==
""
:
# separation between list and other paragraph
cell
.
list_flag
=
False
cell
.
content
+=
"
\n
"
#if not cell['content'].endswith("\n") else ""
else
:
cell
[
'
content
'
]
=
re
.
sub
(
r
'
\\\s*$
'
,
"
\n
"
,
content
.
strip
())
cell
.
content
=
re
.
sub
(
r
'
\\\s*$
'
,
"
\n
"
,
content
.
strip
())
else
:
if
content
.
strip
().
startswith
(
"
-
"
):
# List
if
not
list_flag
:
cell
[
'
content
'
]
+=
"
\n
"
if
not
cell
.
list_flag
:
cell
.
content
+=
"
\n
"
#cell['content'] = cell['content'].strip("\n")
list_flag
=
True
cell
[
'
content
'
]
+=
content
.
strip
()
+
"
\n
"
# Add newline to know when the list element ends
elif
list_flag
:
# any other content when handling list is concatenated to the last list element
cell
[
'
content
'
]
=
cell
[
'
content
'
]
.
strip
(
"
\n
"
)
cell
[
'
content
'
]
+=
"
"
+
content
.
strip
()
+
"
\n
"
cell
.
list_flag
=
True
cell
.
content
+=
content
.
strip
()
+
"
\n
"
# Add newline to know when the list element ends
elif
cell
.
list_flag
and
cells
[
i
].
strip
()
!=
""
:
# any other content when handling list is concatenated to the last list element
cell
.
content
=
cell
.
content
.
strip
(
"
\n
"
)
cell
.
content
+=
"
"
+
content
.
strip
()
+
"
\n
"
elif
cells
[
i
].
strip
()
==
""
:
# separation between list and other paragraph
list_flag
=
False
cell
.
list_flag
=
False
#content = re.sub(r'\\\s*$', "\n", content.strip())
cell
[
'
content
'
]
+=
"
\n
"
if
not
cell
[
'
content
'
]
.
endswith
(
"
\n
"
)
else
""
cell
.
content
+=
"
\n
"
if
not
cell
.
content
.
endswith
(
"
\n
"
)
else
""
else
:
content
=
re
.
sub
(
r
'
\\\s*$
'
,
"
\n
"
,
content
.
strip
())
cell
[
'
content
'
]
+=
"
"
+
content
cell
.
content
+=
"
"
+
content
#print(cell['content'])
return
list_flag
,
cell
return
cell
def
adjust_colspan
(
row
,
column_index
,
number_of_parts
,
line
,
number_of_columns
,
delimiter_positions
):
for
j
in
range
(
column_index
,
number_of_parts
):
delimiter_start
=
row
[
j
-
1
]
[
'
position
'
]
if
j
!=
0
else
0
delimiter_start
=
row
[
j
-
1
]
.
position
if
j
!=
0
else
0
positions
=
[
line
.
find
(
delimiter
,
delimiter_start
+
1
)
for
delimiter
in
"
|+
"
if
delimiter
in
line
[
delimiter_start
+
1
:]]
position
=
min
(
positions
)
if
positions
else
-
1
if
position
>
delimiter_positions
[
j
]:
# Colspan to be increased
row
[
i
]
[
'
colspan
'
]
+=
1
row
[
i
]
.
colspan
+=
1
if
position
==
delimiter_positions
[
len
(
delimiter_positions
)
-
1
]:
# last cell in row, adjust colspan to get max number columns
colspan_allocated
=
0
for
cell_index
in
range
(
number_of_parts
):
colspan_allocated
+=
row
[
cell_index
]
[
'
colspan
'
]
row
[
column_index
]
[
'
colspan
'
]
+=
number_of_columns
-
colspan_allocated
colspan_allocated
=
row
[
i
].
colspan
#
for cell_index in range(number_of_parts):
#
colspan_allocated += row[cell_index]
.
colspan
row
[
column_index
]
.
colspan
+=
number_of_columns
-
colspan_allocated
-
column_index
elif
position
<
delimiter_positions
[
j
]:
raise
ValueError
(
"
Wrong cell formatting
"
)
else
:
...
...
@@ -563,6 +591,7 @@ def parse_pandoc_table_with_spans(pandoc_table):
data_rows
=
[]
for
row
in
range
(
len
(
separator_indices
)
-
1
):
table_row
=
[]
auxiliar_rows
=
[]
auxiliar_row
=
[]
use_auxiliar_row
=
[]
list_flags
=
[]
...
...
@@ -591,65 +620,70 @@ def parse_pandoc_table_with_spans(pandoc_table):
# else:
# alignments.append("align=\"center\"")
header_delimiter_index
=
0
table_row
=
Row
(
number_of_columns_row
)
for
i
in
range
(
number_of_columns_row
):
delimiter_index
+=
len
(
parts
[
i
])
+
1
table_row
.
append
({
"
content
"
:
None
,
"
rowspan
"
:
0
,
"
colspan
"
:
0
,
"
colspan_adjusted
"
:
False
,
"
alignment
"
:
default_alignments
[
i
]
if
i
==
0
else
"
align=
\"
center
\"
"
,
"
position
"
:
delimiter_index
# Position of cell delimiter +
})
table_row
.
cells
[
i
].
alignment
=
default_alignments
[
i
]
if
i
==
0
else
"
align=
\"
center
\"
"
table_row
.
cells
[
i
].
position
=
delimiter_index
# Position of cell delimiter +
#Set alignment as defined by header separator line
while
header_delimiter_index
in
range
(
len
(
default_alignments
))
and
table_row
[
i
]
[
'
position
'
]
>
header_delimiter_positions
[
header_delimiter_index
]:
while
header_delimiter_index
in
range
(
len
(
default_alignments
))
and
table_row
.
cells
[
i
]
.
position
>
header_delimiter_positions
[
header_delimiter_index
]:
header_delimiter_index
+=
1
if
header_delimiter_index
in
range
(
len
(
default_alignments
)):
if
table_row
[
i
]
[
'
position
'
]
<
header_delimiter_positions
[
header_delimiter_index
]:
table_row
[
i
]
[
'
alignment
'
]
=
default_alignments
[
header_delimiter_index
]
elif
table_row
[
i
]
[
'
position
'
]
==
header_delimiter_positions
[
header_delimiter_index
]:
table_row
[
i
]
[
'
alignment
'
]
=
default_alignments
[
i
]
if
table_row
.
cells
[
i
]
.
position
<
header_delimiter_positions
[
header_delimiter_index
]:
table_row
.
cells
[
i
]
.
alignment
=
default_alignments
[
header_delimiter_index
]
elif
table_row
.
cells
[
i
]
.
position
==
header_delimiter_positions
[
header_delimiter_index
]:
table_row
.
cells
[
i
]
.
alignment
=
default_alignments
[
i
]
header_delimiter_index
+=
1
else
:
raise
ValueError
(
"
Invalid table formatting
"
)
for
i
in
range
(
number_of_columns
):
auxiliar_row
.
append
({
"
content
"
:
None
,
"
rowspan
"
:
0
,
"
colspan
"
:
0
,
"
colspan_adjusted
"
:
False
,
"
alignment
"
:
"
align=
\"
center
\"
"
,
"
position
"
:
0
})
use_auxiliar_row
.
append
(
False
)
list_flags
.
append
(
False
)
#auxiliar_row = Row(number_of_columns)
#for i in range(number_of_columns):
#auxiliar_row.append(default_cell)
#use_auxiliar_row.append(False)
#auxiliar_rows.append({'auxiliar_row':auxiliar_row, 'use_auxiliar':use_auxiliar_row, 'list_flags':list_flags})
elif
in_data_row
:
# Regular data row or partial separator
if
_matchGridTableBodySeparator
.
match
(
line
):
# Partial separator
has_merged_cells
=
True
#Add auxiliar line, set delimiters for each cell
auxiliar_rows
.
append
(
Row
(
number_of_columns
))
aux_delimiter_index
=
0
for
i
in
range
(
number_of_columns_row
):
aux_delimiter_index
+=
len
(
parts
[
i
])
+
1
auxiliar_rows
[
-
1
].
cells
[
i
].
position
=
aux_delimiter_index
# Position of cell delimiter +
cells
=
re
.
split
(
r
"
\s*[\|\+]\s*
"
,
line
.
strip
(
"
|
"
).
strip
(
"
+
"
))
# (?<!\\)[\|\+]
if
len
(
cells
)
<=
number_of_columns
:
# Colspan: Positions of | with respect to + need to be determined
for
i
in
range
(
len
(
cells
)):
if
_matchGridTableBodySeparatorLine
.
match
(
cells
[
i
]):
# A new row is to be added
use_auxiliar_row
[
i
]
=
True
list_flags
[
i
]
=
False
if
cells
[
i
].
startswith
(
"
:
"
)
and
not
cells
[
i
].
endswith
(
"
:
"
):
auxiliar_row
[
i
][
'
alignment
'
]
=
"
align=
\"
left
\"
"
elif
not
cells
[
i
].
startswith
(
"
:
"
)
and
cells
[
i
].
endswith
(
"
:
"
):
auxiliar_row
[
i
][
'
alignment
'
]
=
"
align=
\"
right
\"
"
else
:
auxiliar_row
[
i
][
'
alignment
'
]
=
"
align=
\"
center
\"
"
#auxiliar_rows[-1]['use_auxiliar_row'][i] = True
auxiliar_rows
[
-
1
].
cells
[
i
].
list_flag
=
False
table_row
.
cells
[
i
].
auxiliar_index
=
len
(
auxiliar_rows
)
-
1
#if cells[i].startswith(":") and not cells[i].endswith(":"):
# auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"left\""
#elif not cells[i].startswith(":") and cells[i].endswith(":"):
# auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"right\""
#else:
# auxiliar_rows[-1]['auxiliar_row'][i]['alignment'] = "align=\"center\""
else
:
# Handle content of the cell
list_flags
[
i
],
table_row
[
i
]
=
handling_content
(
table_row
[
i
],
cells
[
i
],
list_flags
[
i
])
if
table_row
.
cells
[
i
].
auxiliar_index
is
not
None
:
# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
][
i
]
=
handling_content
(
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
][
i
],
cells
[
i
])
if
not
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
][
i
].
colspan_adjusted
:
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
][
i
].
colspan_adjusted
=
True
# TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
][
i
]
=
adjust_colspan
(
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
],
i
,
len
(
cells
),
line
,
number_of_columns
,
delimiter_positions
)
else
:
table_row
.
cells
[
i
]
=
handling_content
(
table_row
.
cells
[
i
],
cells
[
i
])
# Cell which is not separator
table_row
[
i
]
[
'
rowspan
'
]
+=
1
if
not
table_row
[
i
]
[
'
colspan_adjusted
'
]
:
table_row
[
i
]
[
'
colspan_adjusted
'
]
=
True
table_row
.
cells
[
i
]
.
rowspan
+=
1
if
not
table_row
.
cells
[
i
]
.
colspan_adjusted
:
table_row
.
cells
[
i
]
.
colspan_adjusted
=
True
#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
table_row
[
i
]
=
adjust_colspan
(
table_row
,
i
,
len
(
cells
),
line
,
number_of_columns
,
delimiter_positions
)
table_row
.
cells
[
i
]
=
adjust_colspan
(
table_row
.
cells
,
i
,
len
(
cells
),
line
,
number_of_columns
,
delimiter_positions
)
#elif len(cells) == number_of_columns: # Simple row with partial separator, # A new row is to be added
# for i in range(len(cells)):
# if _matchGridTableBodySeparatorLine.match(cells[i]): # Update cell in new row
...
...
@@ -674,30 +708,42 @@ def parse_pandoc_table_with_spans(pandoc_table):
if
len
(
cells
)
<
number_of_columns
:
# Colspan: Positions of | with respect to + need to be determined
for
i
in
range
(
len
(
cells
)):
# Handle content of the cell
list_flags
[
i
],
table_row
[
i
]
=
handling_content
(
table_row
[
i
],
cells
[
i
],
list_flags
[
i
])
if
not
table_row
[
i
][
'
colspan_adjusted
'
]:
table_row
[
i
][
'
colspan_adjusted
'
]
=
True
table_row
[
i
]
=
adjust_colspan
(
table_row
,
i
,
len
(
cells
),
line
,
number_of_columns
,
delimiter_positions
)
if
table_row
.
cells
[
i
].
auxiliar_index
is
not
None
:
# and auxiliar_rows[table_row[i]['auxiliar_index']]['use_auxiliar_row'][i]:
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
].
cells
[
i
]
=
handling_content
(
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
].
cells
[
i
],
cells
[
i
])
if
not
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
].
cells
[
i
].
colspan_adjusted
:
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
].
cells
[
i
].
colspan_adjusted
=
True
#TO BE CHECKED Most probably the code below is never executed, colspan should be already adjusted when dealing with a partial separator
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
].
cells
[
i
]
=
adjust_colspan
(
auxiliar_rows
[
table_row
.
cells
[
i
].
auxiliar_index
].
cells
,
i
,
len
(
cells
),
line
,
number_of_columns
,
delimiter_positions
)
else
:
table_row
.
cells
[
i
]
=
handling_content
(
table_row
.
cells
[
i
],
cells
[
i
])
if
not
table_row
.
cells
[
i
].
colspan_adjusted
:
table_row
.
cells
[
i
].
colspan_adjusted
=
True
table_row
.
cells
[
i
]
=
adjust_colspan
(
table_row
.
cells
,
i
,
len
(
cells
),
line
,
number_of_columns
,
delimiter_positions
)
elif
len
(
cells
)
==
number_of_columns
:
# Simple row
for
i
in
range
(
len
(
cells
)):
if
use_auxiliar_row
[
i
]:
list_flag
s
[
i
]
,
auxiliar_
row
[
i
]
=
handling_content
(
auxiliar_row
[
i
],
cells
[
i
],
list_flag
s
[
i
])
if
table_row
.
cells
[
i
].
auxiliar_index
is
not
None
:
# and auxiliar_rows[table_row[i]['auxiliar_index']]['
use_auxiliar_row
']
[i]:
auxiliar_rows
[
table_row
.
cell
s
[
i
]
.
auxiliar_
index
].
cells
[
i
]
=
handling_content
(
auxiliar_row
s
[
table_row
.
cells
[
i
].
auxiliar_index
].
cells
[
i
],
cell
s
[
i
])
else
:
# Handle content of the cell
list_flags
[
i
],
table_row
[
i
]
=
handling_content
(
table_row
[
i
],
cells
[
i
],
list_flag
s
[
i
])
table_row
.
cells
[
i
]
=
handling_content
(
table_row
.
cells
[
i
],
cell
s
[
i
])
else
:
raise
ValueError
(
"
More cells than columns found
"
)
else
:
raise
ValueError
(
"
No separator line found for row starting
"
)
if
has_header
and
start
>=
header_separator_index
:
# table_row and auxiliar_row are part of data_rows
data_rows
.
append
(
table_row
)
data_rows
.
append
(
table_row
.
cells
)
if
has_merged_cells
:
data_rows
.
append
(
auxiliar_row
)
for
row
in
auxiliar_rows
:
#for i in range(len(row.cells)):
# print(row.cells[i].content)
data_rows
.
append
(
row
.
cells
)
elif
has_header
and
start
<
header_separator_index
:
# table_row and auxiliar_row are part of header_rows
header_rows
.
append
(
table_row
)
header_rows
.
append
(
table_row
.
cells
)
if
has_merged_cells
:
header_rows
.
append
(
auxiliar_row
)
for
row
in
auxiliar_rows
:
header_rows
.
append
(
row
.
cells
)
#print(header_rows)
#print(data_rows)
...
...
@@ -711,35 +757,35 @@ def parse_pandoc_table_with_spans(pandoc_table):
italic
=
"
<i>
"
for
row
in
rows
:
for
cell
in
row
:
if
cell
[
'
content
'
]
is
not
None
:
if
cell
.
content
is
not
None
:
# Replacing "<" by <
cell
[
'
content
'
]
=
cell
[
'
content
'
]
.
replace
(
"
<
"
,
"
<
"
)
cell
.
content
=
cell
.
content
.
replace
(
"
<
"
,
"
<
"
)
#Bold
for
bold_characters
in
[
"
**
"
,
"
__
"
]:
while
cell
[
'
content
'
]
.
find
(
bold_characters
)
!=
-
1
:
cell
[
'
content
'
]
=
cell
[
'
content
'
]
.
replace
(
bold_characters
,
bold
,
1
)
while
cell
.
content
.
find
(
bold_characters
)
!=
-
1
:
cell
.
content
=
cell
.
content
.
replace
(
bold_characters
,
bold
,
1
)
if
bold
==
"
<strong>
"
:
bold
=
"
</strong>
"
else
:
bold
=
"
<strong>
"
#Italic
while
cell
[
'
content
'
]
.
find
(
"
_
"
)
!=
-
1
and
cell
[
'
content
'
]
.
find
(
"
\_
"
)
==
-
1
:
cell
[
'
content
'
]
=
cell
[
'
content
'
]
.
rstrip
()
.
replace
(
"
_
"
,
italic
,
1
)
while
cell
.
content
.
find
(
"
_
"
)
!=
-
1
and
cell
.
content
.
find
(
"
\_
"
)
==
-
1
:
cell
.
content
=
cell
.
content
.
rstrip
()
.
replace
(
"
_
"
,
italic
,
1
)
if
italic
==
"
<i>
"
:
italic
=
"
</i>
"
else
:
italic
=
"
<i>
"
while
cell
[
'
content
'
]
.
find
(
"
\_
"
)
!=
-
1
:
cell
[
'
content
'
]
=
cell
[
'
content
'
]
.
rstrip
().
replace
(
"
\_
"
,
"
_
"
,
1
)
while
cell
.
content
.
find
(
"
\_
"
)
!=
-
1
:
cell
.
content
=
cell
.
content
.
rstrip
().
replace
(
"
\_
"
,
"
_
"
,
1
)
# Correct newlines characters
for
row
in
header_rows
:
for
cell
in
row
:
cell
[
'
content
'
]
=
cell
[
'
content
'
]
.
replace
(
"
\n
"
,
"
<br />
"
)
if
cell
[
'
content
'
]
is
not
None
else
None
cell
.
content
=
cell
.
content
.
replace
(
"
\n
"
,
"
<br />
"
)
if
cell
.
content
is
not
None
else
None
for
row
in
data_rows
:
for
cell
in
row
:
cell
[
'
content
'
]
=
cell
[
'
content
'
]
.
replace
(
"
\n
"
,
"
<br />
"
)
if
cell
[
'
content
'
]
is
not
None
else
None
cell
.
content
=
cell
.
content
.
replace
(
"
\n
"
,
"
<br />
"
)
if
cell
.
content
is
not
None
else
None
# Checking that the grid is correct Not too much tested - need to take into account rowspan of previous rows
forward_rowspan
=
[]
...
...
@@ -748,13 +794,13 @@ def parse_pandoc_table_with_spans(pandoc_table):
forward_rowspan
=
[
0
for
_
in
range
(
len
(
header_rows
[
row_index
]))]
sum
=
0
for
cell_index
in
range
(
len
(
header_rows
[
row_index
])):
sum
+=
header_rows
[
row_index
][
cell_index
]
[
'
colspan
'
]
if
row_index
>
0
and
header_rows
[
row_index
][
cell_index
]
[
'
colspan
'
]
==
0
:
sum
+=
header_rows
[
row_index
][
cell_index
]
.
colspan
if
row_index
>
0
and
header_rows
[
row_index
][
cell_index
]
.
colspan
==
0
:
if
forward_rowspan
[
cell_index
]
>
0
:
sum
+=
1
forward_rowspan
[
cell_index
]
-=
1
if
forward_rowspan
[
cell_index
]
==
0
and
header_rows
[
row_index
][
cell_index
]
[
'
rowspan
'
]
>
1
:
forward_rowspan
[
cell_index
]
=
header_rows
[
row_index
][
cell_index
]
[
'
rowspan
'
]
-
1
if
forward_rowspan
[
cell_index
]
==
0
and
header_rows
[
row_index
][
cell_index
]
.
rowspan
>
1
:
forward_rowspan
[
cell_index
]
=
header_rows
[
row_index
][
cell_index
]
.
rowspan
-
1
if
not
sum
==
number_of_columns
:
raise
ValueError
(
"
Grid table not converted properly
"
)
forward_rowspan
=
[]
...
...
@@ -763,13 +809,13 @@ def parse_pandoc_table_with_spans(pandoc_table):
forward_rowspan
=
[
0
for
_
in
range
(
len
(
data_rows
[
row_index
]))]
sum
=
0
for
cell_index
in
range
(
len
(
data_rows
[
row_index
])):
sum
+=
data_rows
[
row_index
][
cell_index
]
[
'
colspan
'
]
if
row_index
>
0
and
data_rows
[
row_index
][
cell_index
]
[
'
colspan
'
]
==
0
:
sum
+=
data_rows
[
row_index
][
cell_index
]
.
colspan
if
row_index
>
0
and
data_rows
[
row_index
][
cell_index
]
.
colspan
==
0
:
if
forward_rowspan
[
cell_index
]
>
0
:
sum
+=
1
forward_rowspan
[
cell_index
]
-=
1
if
forward_rowspan
[
cell_index
]
==
0
and
data_rows
[
row_index
][
cell_index
]
[
'
rowspan
'
]
>
1
:
forward_rowspan
[
cell_index
]
=
data_rows
[
row_index
][
cell_index
]
[
'
rowspan
'
]
-
1
if
forward_rowspan
[
cell_index
]
==
0
and
data_rows
[
row_index
][
cell_index
]
.
rowspan
>
1
:
forward_rowspan
[
cell_index
]
=
data_rows
[
row_index
][
cell_index
]
.
rowspan
-
1
if
not
sum
==
number_of_columns
:
raise
ValueError
(
"
Grid table not converted properly
"
)
...
...
@@ -789,35 +835,35 @@ def generate_html_table_with_spans(pandoc_table):
for
row
in
grid_header
:
for
cell
in
row
:
if
cell
[
'
rowspan
'
]
!=
0
and
cell
[
'
colspan
'
]
!=
0
:
if
cell
.
rowspan
!=
0
and
cell
.
colspan
!=
0
:
has_header
=
True
if
has_header
:
html
+=
"
<thead>
\n
"
for
row
in
grid_header
:
html
+=
"
<tr>
\n
"
for
cell
in
row
:
if
cell
[
'
rowspan
'
]
==
0
or
cell
[
'
colspan
'
]
==
0
:
if
cell
.
rowspan
==
0
or
cell
.
colspan
==
0
:
continue
else
:
# Prepare content, in case there's a list
#print(cell
['
content
']
)
#print(cell
.
content)
if
matches
:
=
re
.
findall
(
r
"
\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>
"
,
cell
[
'
content
'
]
):
# Update cell in new row
cell
.
content
):
# Update cell in new row
#print("MATCHING")
list
=
"
<ul>
"
# Build list the matches
for
match
in
matches
:
list
+=
"
<li>
"
+
match
[
1
]
+
"
</li>
"
list
+=
"
</ul>
"
cell
[
'
content
'
]
=
re
.
sub
(
r
"
(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+
"
,
list
,
cell
[
'
content
'
]
)
cell
.
content
=
re
.
sub
(
r
"
(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+
"
,
list
,
cell
.
content
)
# Enforce left alignment if cell contains a list
cell
[
'
alignment
'
]
=
"
align=
\"
left
\"
"
cell
.
alignment
=
"
align=
\"
left
\"
"
#else:
# print("NOT MATCHING")
rowspan
=
f
"
rowspan=
\"
{
cell
[
'
rowspan
'
]
}
\"
"
if
cell
[
"
rowspan
"
]
>
1
else
""
colspan
=
f
"
colspan=
\"
{
cell
[
'
colspan
'
]
}
\"
"
if
cell
[
"
colspan
"
]
>
1
else
""
html
+=
f
"
<th
{
rowspan
}{
colspan
}
{
cell
[
'
alignment
'
]
}
>
{
cell
[
'
content
'
]
}
</th>
\n
"
rowspan
=
f
"
rowspan=
\"
{
cell
.
rowspan
}
\"
"
if
cell
.
rowspan
>
1
else
""
colspan
=
f
"
colspan=
\"
{
cell
.
colspan
}
\"
"
if
cell
.
colspan
>
1
else
""
html
+=
f
"
<th
{
rowspan
}{
colspan
}
{
cell
.
alignment
}
>
{
cell
.
content
}
</th>
\n
"
html
+=
"
</tr>
\n
"
html
+=
"
</thead>
\n
"
...
...
@@ -825,26 +871,27 @@ def generate_html_table_with_spans(pandoc_table):
for
row
in
grid_body
:
html
+=
"
<tr>
\n
"
for
cell
in
row
:
if
cell
[
'
rowspan
'
]
==
0
or
cell
[
'
colspan
'
]
==
0
:
if
cell
.
rowspan
==
0
or
cell
.
colspan
==
0
:
continue
else
:
#Prepare content, in case there's a list
#print(cell
['
content
']
)
if
matches
:
=
re
.
findall
(
r
"
\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>
"
,
cell
[
'
content
'
]
):
# Update cell in new row
#print(cell
.
content)
if
matches
:
=
re
.
findall
(
r
"
\s*([-*+]|\s*\d+\.)\s+([^<]+)<br \/>
"
,
cell
.
content
):
# Update cell in new row
#print("MATCHING")
#print(cell.content)
list
=
"
<ul>
"
# Build list the matches
for
match
in
matches
:
list
+=
"
<li>
"
+
match
[
1
]
+
"
</li>
"
list
+=
"
</ul>
"
cell
[
'
content
'
]
=
re
.
sub
(
r
"
(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+
"
,
list
,
cell
[
'
content
'
]
)
cell
.
content
=
re
.
sub
(
r
"
(\s*([-*+]|\s*\d+\.)\s+[^<]+<br \/>)+
"
,
list
,
cell
.
content
)
# Enforce left alignment if cell contains a list
cell
[
'
alignment
'
]
=
"
align=
\"
left
\"
"
cell
.
alignment
=
"
align=
\"
left
\"
"
#else:
#print("NOT MATCHING")
rowspan
=
f
"
rowspan=
\"
{
cell
[
'
rowspan
'
]
}
\"
"
if
cell
[
"
rowspan
"
]
>
1
else
""
colspan
=
f
"
colspan=
\"
{
cell
[
'
colspan
'
]
}
\"
"
if
cell
[
"
colspan
"
]
>
1
else
""
html
+=
f
"
<td
{
rowspan
}{
colspan
}
{
cell
[
'
alignment
'
]
}
>
{
cell
[
'
content
'
]
}
</td>
\n
"
rowspan
=
f
"
rowspan=
\"
{
cell
.
rowspan
}
\"
"
if
cell
.
rowspan
>
1
else
""
colspan
=
f
"
colspan=
\"
{
cell
.
colspan
}
\"
"
if
cell
.
colspan
>
1
else
""
html
+=
f
"
<td
{
rowspan
}{
colspan
}
{
cell
.
alignment
}
>
{
cell
.
content
}
</td>
\n
"
html
+=
"
</tr>
\n
"
html
+=
"
</tbody>
\n
"
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment