Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
S
Scripts
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Specification Tools
Scripts
Commits
bc780760
Commit
bc780760
authored
7 months ago
by
Miguel Angel Reina Ortega
Browse files
Options
Downloads
Patches
Plain Diff
Support for grid tables and equations on mkdocs
parent
fd0dfa13
No related branches found
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
generateChangemarks/.gitlab-ci.yml
+2
-2
2 additions, 2 deletions
generateChangemarks/.gitlab-ci.yml
toMkdocs/mkdocs.yml
+6
-0
6 additions, 0 deletions
toMkdocs/mkdocs.yml
toMkdocs/toMkdocs.py
+342
-4
342 additions, 4 deletions
toMkdocs/toMkdocs.py
with
350 additions
and
6 deletions
generateChangemarks/.gitlab-ci.yml
+
2
−
2
View file @
bc780760
...
...
@@ -197,12 +197,12 @@ pages:
curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fstylesheets%2Fextra%2Ecss/raw?ref=master" >> extra.css
-
mkdir -p docs/stylesheets && mv extra.css docs/stylesheets/
-
|
curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fmkdocs%2Eyml/raw?ref=
master
" >> mkdocs.yml
curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2Fmkdocs%2Eyml/raw?ref=
gridtables
" >> mkdocs.yml
-
|
curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FindexDownload%2Emd/raw?ref=master" >> indexDownload.md
-
mkdir -p docs/download && mv indexDownload.md docs/download/index.md
-
|
curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FtoMkdocs%2Epy/raw?ref=
master
" >> toMkdocs.py
curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/toMkdocs%2FtoMkdocs%2Epy/raw?ref=
gridtables
" >> toMkdocs.py
-
|
export SPEC_NAME=$(ls | grep -E "(TS|TR|WI).*\.md" | cut -d'.' -f1)
-
|
...
...
This diff is collapsed.
Click to expand it.
toMkdocs/mkdocs.yml
+
6
−
0
View file @
bc780760
...
...
@@ -60,6 +60,8 @@ markdown_extensions:
pygments_lang_class
:
true
-
pymdownx.inlinehilite
-
pymdownx.snippets
-
pymdownx.arithmatex
:
generic
:
true
-
pymdownx.superfences
:
custom_fences
:
-
name
:
mermaid
...
...
@@ -69,6 +71,10 @@ markdown_extensions:
alternate_style
:
true
-
tables
extra_javascript
:
-
javascripts/mathjax.js
-
https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js
##############################################################################
extra
:
...
...
This diff is collapsed.
Click to expand it.
toMkdocs/toMkdocs.py
+
342
−
4
View file @
bc780760
...
...
@@ -11,6 +11,7 @@ from enum import Enum, auto
import
argparse
,
re
,
os
,
shutil
,
hashlib
,
base64
from
dataclasses
import
dataclass
from
rich
import
print
from
html
import
escape
verbose
=
False
veryVerbose
=
False
...
...
@@ -418,6 +419,9 @@ _matchNote = re.compile(r'^\s*>\s*', re.IGNORECASE)
_matchStandAloneImage
=
re
.
compile
(
r
'
^\s*!\[[^\]]*\]\(([^)]*)\)\s*
'
,
re
.
IGNORECASE
)
_matchTable
=
re
.
compile
(
r
'
^\s*\|.*\|\s$
'
,
re
.
IGNORECASE
)
_matchTableSeparator
=
re
.
compile
(
r
'
^\s*\|([-: ]+\|)+\s*$
'
,
re
.
IGNORECASE
)
_matchGridTable
=
re
.
compile
(
r
'
^\s*\+-.*\+\s$
'
,
re
.
IGNORECASE
)
_matchGridTableBodySeparator
=
re
.
compile
(
r
'
.*\+([-:]+\+)+.*$
'
,
re
.
IGNORECASE
)
_matchGridTableHeaderSeparator
=
re
.
compile
(
r
'
.*\+([=:]+\+)+.*$
'
,
re
.
IGNORECASE
)
_match2spaceListIndention
=
re
.
compile
(
r
'
^\s{2}-
'
,
re
.
IGNORECASE
)
_markdownLink
=
re
.
compile
(
r
'
[^!]\[[^\]]*\]\((#[^)]*)\)
'
,
re
.
IGNORECASE
)
_htmlLink
=
re
.
compile
(
r
'
<a\s+href=
"
([^
"
\']*)
"
>[^<]*</a>
'
,
re
.
IGNORECASE
)
...
...
@@ -447,6 +451,309 @@ def shortHash(value:str, length:int) -> str:
).
digest
()
).
decode
()[:
length
]
def
parse_pandoc_table_with_spans
(
pandoc_table
):
"""
Parse a Pandoc-style grid table into a structure for HTML conversion with rowspan and colspan.
:param pandoc_table: String of the Pandoc-style grid table.
:return: List of lists representing the table with metadata for spans.
"""
# Split the input into lines
lines
=
[
line
.
strip
()
for
line
in
pandoc_table
.
strip
().
split
(
"
\n
"
)]
# Detect separator lines by pattern (it does not take into account partial separators
def
is_separator
(
line
):
_matchGridTableSeparator
=
re
.
compile
(
r
'
\s*\+([-:=]+\+)+\s*$
'
,
re
.
IGNORECASE
)
return
_matchGridTableSeparator
.
match
(
line
)
_matchGridTableSeparatorLine
=
re
.
compile
(
r
'
[-:]+$
'
,
re
.
IGNORECASE
)
separator_indices
=
[
i
for
i
,
line
in
enumerate
(
lines
)
if
is_separator
(
line
)]
print
(
separator_indices
)
if
not
separator_indices
:
raise
ValueError
(
"
No valid separators found in the provided Pandoc table.
"
)
# Calculate max number of columns
delimiter_positions
=
[]
number_of_columns
=
0
for
separator_index
in
separator_indices
:
if
lines
[
separator_index
].
count
(
"
+
"
)
-
1
>
number_of_columns
:
number_of_columns
=
lines
[
separator_index
].
count
(
"
+
"
)
-
1
delimiter_positions
=
[]
for
j
in
range
(
number_of_columns
):
delimiter_positions_start
=
delimiter_positions
[
j
-
1
]
if
j
!=
0
else
0
del_positions
=
[
lines
[
separator_index
].
find
(
delimiter
,
delimiter_positions_start
+
1
)
for
delimiter
in
"
+
"
if
delimiter
in
lines
[
separator_index
][
delimiter_positions_start
+
1
:]]
delimiter_positions
.
append
(
min
(
del_positions
)
if
del_positions
else
-
1
)
has_header
=
False
for
index
in
separator_indices
:
if
_matchGridTableHeaderSeparator
.
match
(
lines
[
index
]):
has_header
=
True
header_separator_index
=
index
header_rows
=
[]
data_rows
=
[]
for
row
in
range
(
len
(
separator_indices
)
-
1
):
table_row
=
[]
auxiliar_row
=
[]
use_auxiliar_row
=
[]
has_merged_cells
=
False
in_data_row
=
False
start
,
end
=
separator_indices
[
row
],
separator_indices
[
row
+
1
]
row_lines
=
lines
[
start
:
end
]
# Lines between separators including separator line start as it gives information about the number of columns of the row
if
row_lines
:
# Combine multiline content into single strings for each cell
for
line
in
row_lines
:
if
is_separator
(
line
)
and
not
in_data_row
:
number_of_columns_row
=
line
.
count
(
"
+
"
)
-
1
in_data_row
=
True
parts
=
re
.
split
(
r
"
\s*\+\s*
"
,
line
.
strip
(
"
+
"
))
# Add as many cells as columns with span attributes
delimiter_index
=
0
for
i
in
range
(
number_of_columns_row
):
delimiter_index
+=
len
(
parts
[
i
])
+
1
table_row
.
append
({
"
content
"
:
"
NOCONTENT
"
,
"
rowspan
"
:
0
,
"
colspan
"
:
0
,
"
colspan_adjusted
"
:
False
,
"
position
"
:
delimiter_index
# Position of cell delimiter +
})
for
i
in
range
(
number_of_columns
):
auxiliar_row
.
append
({
"
content
"
:
"
NOCONTENT
"
,
"
rowspan
"
:
0
,
"
colspan
"
:
0
,
"
colspan_adjusted
"
:
False
,
"
position
"
:
0
})
use_auxiliar_row
.
append
(
False
)
elif
in_data_row
:
# Regular data row or partial separator
if
_matchGridTableBodySeparator
.
match
(
line
):
# Partial separator
has_merged_cells
=
True
cells
=
re
.
split
(
r
"
\s*[\|\+]\s*
"
,
line
.
strip
(
"
|
"
).
strip
(
"
+
"
))
# (?<!\\)[\|\+]
if
len
(
cells
)
<
number_of_columns
:
# Colspan: Positions of | with respect to + need to be determined
for
i
in
range
(
len
(
cells
)):
if
_matchGridTableSeparatorLine
.
match
(
cells
[
i
]):
# A new row is to be added
use_auxiliar_row
[
i
]
=
True
else
:
if
table_row
[
i
][
'
content
'
]
==
"
NOCONTENT
"
:
table_row
[
i
][
'
rowspan
'
]
+=
1
table_row
[
i
][
'
colspan
'
]
+=
1
table_row
[
i
][
'
content
'
]
=
cells
[
i
]
else
:
table_row
[
i
][
'
content
'
]
+=
cells
[
i
]
# Cell which is not separator
table_row
[
i
][
'
rowspan
'
]
+=
1
if
not
table_row
[
i
][
'
colspan_adjusted
'
]:
table_row
[
i
][
'
colspan_adjusted
'
]
=
True
for
j
in
range
(
i
,
len
(
cells
)):
delimiter_start
=
table_row
[
j
-
1
][
'
position
'
]
if
j
!=
0
else
0
positions
=
[
line
.
find
(
delimiter
,
delimiter_start
+
1
)
for
delimiter
in
"
|+
"
if
delimiter
in
line
[
delimiter_start
+
1
:]]
position
=
min
(
positions
)
if
positions
else
-
1
if
position
>
delimiter_positions_start
[
j
]:
# Colspan to add
table_row
[
i
][
'
colspan
'
]
+=
1
elif
position
<
delimiter_positions_start
[
j
]:
raise
ValueError
(
"
Wrong cell formatting
"
)
else
:
break
elif
len
(
cells
)
==
number_of_columns
:
# Simple row with partial separator, # A new row is to be added
for
i
in
range
(
len
(
cells
)):
if
_matchGridTableSeparatorLine
.
match
(
cells
[
i
]):
# Update cell in new row
use_auxiliar_row
[
i
]
=
True
else
:
if
table_row
[
i
][
'
content
'
]
==
"
NOCONTENT
"
:
table_row
[
i
][
'
rowspan
'
]
+=
1
table_row
[
i
][
'
colspan
'
]
+=
1
table_row
[
i
][
'
content
'
]
=
cells
[
i
]
else
:
table_row
[
i
][
'
content
'
]
+=
cells
[
i
]
# Cell which is not separator
table_row
[
i
][
'
rowspan
'
]
+=
1
# Not needed, no colspan as number of cells is equal to number of columns
#for j in range(i, len(cells)):
# delimiter_start = table_row[j-1]['position'] if j != 0 else 0
# positions = [line.find(delimiter,delimiter_start+1) for delimiter in "|+" if delimiter in line[delimiter_start+1:]]
# position = min(positions) if positions else -1
# if position > table_row[i]['position']: # Only colspan to be increased
# table_row[i]['colspan'] += 1
# elif position + 1 < table_row[i]['position']:
# raise ValueError("Wrong cell formatting")
# else:
# break
else
:
raise
ValueError
(
"
More cells than columns found
"
)
else
:
# Data row
cells
=
re
.
split
(
r
"
\s*\|\s*
"
,
line
.
strip
(
"
|
"
))
if
len
(
cells
)
<
number_of_columns
:
# Colspan: Positions of | with respect to + need to be determined
for
i
in
range
(
len
(
cells
)):
if
table_row
[
i
][
'
content
'
]
==
"
NOCONTENT
"
:
table_row
[
i
][
'
rowspan
'
]
+=
1
table_row
[
i
][
'
colspan
'
]
+=
1
table_row
[
i
][
'
content
'
]
=
cells
[
i
]
else
:
table_row
[
i
][
'
content
'
]
+=
cells
[
i
]
if
not
table_row
[
i
][
'
colspan_adjusted
'
]:
table_row
[
i
][
'
colspan_adjusted
'
]
=
True
for
j
in
range
(
i
,
len
(
cells
)):
delimiter_start
=
table_row
[
j
-
1
][
'
position
'
]
if
j
!=
0
else
0
if
line
.
find
(
"
|
"
,
delimiter_start
+
1
)
>
delimiter_positions
[
j
]:
# Colspan to be increased
table_row
[
i
][
'
colspan
'
]
+=
1
elif
line
.
find
(
"
|
"
,
delimiter_start
+
1
)
<
delimiter_positions
[
j
]:
raise
ValueError
(
"
Wrong cell formatting
"
)
else
:
break
elif
len
(
cells
)
==
number_of_columns
:
# Simple row
for
i
in
range
(
len
(
cells
)):
if
use_auxiliar_row
[
i
]:
if
auxiliar_row
[
i
][
'
content
'
]
==
"
NOCONTENT
"
:
auxiliar_row
[
i
][
'
rowspan
'
]
+=
1
auxiliar_row
[
i
][
'
colspan
'
]
+=
1
auxiliar_row
[
i
][
'
content
'
]
=
cells
[
i
]
else
:
auxiliar_row
[
i
][
'
content
'
]
+=
cells
[
i
]
else
:
if
table_row
[
i
][
'
content
'
]
==
"
NOCONTENT
"
:
table_row
[
i
][
'
rowspan
'
]
+=
1
table_row
[
i
][
'
colspan
'
]
+=
1
table_row
[
i
][
'
content
'
]
=
cells
[
i
]
else
:
table_row
[
i
][
'
content
'
]
+=
cells
[
i
]
else
:
raise
ValueError
(
"
More cells than columns found
"
)
else
:
raise
ValueError
(
"
No separator line found for row starting
"
)
if
has_header
and
start
>=
header_separator_index
:
# table_row and auxiliar_row are part of data_rows
data_rows
.
append
(
table_row
)
if
has_merged_cells
:
data_rows
.
append
(
auxiliar_row
)
elif
has_header
and
start
<
header_separator_index
:
# table_row and auxiliar_row are part of header_rows
header_rows
.
append
(
table_row
)
if
has_merged_cells
:
header_rows
.
append
(
auxiliar_row
)
#print(header_rows)
#print(data_rows)
# Correct newlines characters
for
row
in
header_rows
:
for
cell
in
row
:
cell
[
'
content
'
]
=
cell
[
'
content
'
].
replace
(
"
\\
"
,
"
<br>
"
)
for
row
in
data_rows
:
for
cell
in
row
:
cell
[
'
content
'
]
=
cell
[
'
content
'
].
replace
(
"
\\
"
,
"
<br>
"
)
# Check if there are any data rows
if
not
data_rows
and
not
header_rows
:
raise
ValueError
(
"
No valid rows found in the provided Pandoc table.
"
)
# Format text
bold
=
"
<strong>
"
for
row
in
header_rows
:
for
cell
in
row
:
while
cell
[
'
content
'
].
find
(
"
**
"
)
!=
-
1
:
cell
[
'
content
'
]
=
cell
[
'
content
'
].
replace
(
"
**
"
,
bold
,
1
)
if
bold
==
"
<strong>
"
:
bold
=
"
</strong>
"
else
:
bold
=
"
<strong>
"
bold
=
"
<strong>
"
for
row
in
data_rows
:
for
cell
in
row
:
while
cell
[
'
content
'
].
find
(
"
**
"
)
!=
-
1
:
cell
[
'
content
'
]
=
cell
[
'
content
'
].
replace
(
"
**
"
,
bold
,
1
)
if
bold
==
"
<strong>
"
:
bold
=
"
</strong>
"
else
:
bold
=
"
<strong>
"
# Checking that the grid is correct Not too much tested - need to take into account rowspan of previous rows
forward_rowspan
=
[]
for
row_index
in
range
(
len
(
header_rows
)):
if
len
(
forward_rowspan
)
==
0
:
forward_rowspan
=
[
0
for
_
in
range
(
len
(
header_rows
[
row_index
]))]
sum
=
0
for
cell_index
in
range
(
len
(
header_rows
[
row_index
])):
sum
+=
header_rows
[
row_index
][
cell_index
][
'
colspan
'
]
if
row_index
>
0
and
header_rows
[
row_index
][
cell_index
][
'
colspan
'
]
==
0
:
if
forward_rowspan
[
cell_index
]
>
0
:
sum
+=
1
forward_rowspan
[
cell_index
]
-=
1
if
forward_rowspan
[
cell_index
]
==
0
and
header_rows
[
row_index
][
cell_index
][
'
rowspan
'
]
>
1
:
forward_rowspan
[
cell_index
]
=
header_rows
[
row_index
][
cell_index
][
'
rowspan
'
]
-
1
if
not
sum
==
number_of_columns
:
raise
ValueError
(
"
Grid table not converted properly
"
)
forward_rowspan
=
[]
for
row_index
in
range
(
len
(
data_rows
)):
if
len
(
forward_rowspan
)
==
0
:
forward_rowspan
=
[
0
for
_
in
range
(
len
(
data_rows
[
row_index
]))]
sum
=
0
for
cell_index
in
range
(
len
(
data_rows
[
row_index
])):
sum
+=
data_rows
[
row_index
][
cell_index
][
'
colspan
'
]
if
row_index
>
0
and
data_rows
[
row_index
][
cell_index
][
'
colspan
'
]
==
0
:
if
forward_rowspan
[
cell_index
]
>
0
:
sum
+=
1
forward_rowspan
[
cell_index
]
-=
1
if
forward_rowspan
[
cell_index
]
==
0
and
data_rows
[
row_index
][
cell_index
][
'
rowspan
'
]
>
1
:
forward_rowspan
[
cell_index
]
=
data_rows
[
row_index
][
cell_index
][
'
rowspan
'
]
-
1
if
not
sum
==
number_of_columns
:
raise
ValueError
(
"
Grid table not converted properly
"
)
#if has_header:
# table_with_spans = header_rows
#table_with_spans += data_rows
#return table_with_spans
return
header_rows
,
data_rows
def
generate_html_table_with_spans
(
pandoc_table
):
"""
Generate an HTML table from a Pandoc-style grid table with row and column spans.
:param pandoc_table: String of the Pandoc-style grid table.
:return: HTML string.
"""
grid_header
,
grid_body
=
parse_pandoc_table_with_spans
(
pandoc_table
)
html
=
"
<table>
\n
"
has_header
=
False
for
row
in
grid_header
:
for
cell
in
row
:
if
cell
[
'
rowspan
'
]
!=
0
and
cell
[
'
colspan
'
]
!=
0
:
has_header
=
True
if
has_header
:
html
+=
"
<thead>
\n
"
for
row
in
grid_header
:
html
+=
"
<tr>
\n
"
for
cell
in
row
:
if
cell
[
'
rowspan
'
]
==
0
or
cell
[
'
colspan
'
]
==
0
:
continue
else
:
rowspan
=
f
"
rowspan=
\"
{
cell
[
'
rowspan
'
]
}
\"
"
if
cell
[
"
rowspan
"
]
>
1
else
""
colspan
=
f
"
colspan=
\"
{
cell
[
'
colspan
'
]
}
\"
"
if
cell
[
"
colspan
"
]
>
1
else
""
html
+=
f
"
<td
{
rowspan
}{
colspan
}
>
{
cell
[
'
content
'
]
}
</td>
\n
"
html
+=
"
</tr>
\n
"
html
+=
"
</thead>
\n
"
html
+=
"
<tbody>
\n
"
for
row
in
grid_body
:
html
+=
"
<tr>
\n
"
for
cell
in
row
:
if
cell
[
'
rowspan
'
]
==
0
or
cell
[
'
colspan
'
]
==
0
:
continue
else
:
rowspan
=
f
"
rowspan=
\"
{
cell
[
'
rowspan
'
]
}
\"
"
if
cell
[
"
rowspan
"
]
>
1
else
""
colspan
=
f
"
colspan=
\"
{
cell
[
'
colspan
'
]
}
\"
"
if
cell
[
"
colspan
"
]
>
1
else
""
html
+=
f
"
<td
{
rowspan
}{
colspan
}
>
{
cell
[
'
content
'
]
}
</td>
\n
"
html
+=
"
</tr>
\n
"
html
+=
"
</tbody>
\n
"
html
+=
"
</table>
"
return
html
def
analyseMarkdown
(
filename
:
str
)
->
Document
:
"""
Analyse the markdown file and split it into clauses.
...
...
@@ -473,6 +780,9 @@ def analyseMarkdown(filename:str) -> Document:
inCodefence
=
False
inTable
=
False
tableHasSeparator
=
False
inGridTable
=
False
gridTableHasSeparator
=
False
gridTable
=
""
for
line
in
inLines
:
# Detect and handle codefences
...
...
@@ -493,7 +803,7 @@ def analyseMarkdown(filename:str) -> Document:
continue
# Detect and handle tables
if
_matchTable
.
match
(
line
)
and
not
inTable
:
if
_matchTable
.
match
(
line
)
and
not
inTable
and
not
inGridTable
:
inTable
=
True
outClauses
[
-
1
].
append
(
Line
(
line
,
LineType
.
TABLEHEADER
))
continue
...
...
@@ -512,6 +822,34 @@ def analyseMarkdown(filename:str) -> Document:
outClauses
[
-
1
].
lines
[
-
1
].
lineType
=
LineType
.
TABLELASTROW
# continue with other matches
#Detect grid tables and convert them to html table
if
_matchGridTable
.
match
(
line
)
and
not
inGridTable
:
inGridTable
=
True
#outClauses[-1].append(Line(line, LineType.TABLEHEADER))
gridTable
+=
line
continue
if
inGridTable
:
if
_matchGridTableHeaderSeparator
.
match
(
line
)
or
_matchGridTableBodySeparator
.
match
(
line
):
#outClauses[-1].append(Line(line, LineType.TABLESEPARATOR))
gridTable
+=
line
continue
elif
_matchTable
.
match
(
line
):
#outClauses[-1].append(Line(line, LineType.TABLEROW))
gridTable
+=
line
continue
else
:
inGridTable
=
False
# Mark the previous line as the last row in the table
#outClauses[-1].lines[-1].lineType = LineType.TABLELASTROW
print
(
gridTable
)
htmltable
=
""
htmltable
=
generate_html_table_with_spans
(
gridTable
)
print
(
htmltable
)
for
row
in
htmltable
:
outClauses
[
-
1
].
append
(
Line
(
row
,
LineType
.
TABLEROW
))
gridTable
=
""
# continue with other matches
# Detect notes
# Notes are lines that start with a '>'.
if
_matchNote
.
match
(
line
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment