Skip to content
Snippets Groups Projects
Commit 29b1bf60 authored by Miguel Angel Reina Ortega's avatar Miguel Angel Reina Ortega
Browse files

Generation of changemark CR:

- generation of changemark MD files per modified Clause
- conversion of MD files to docx files
- combination of the docx files into the CR docx file
parent e4f5e2da
No related branches found
No related tags found
No related merge requests found
...@@ -28,7 +28,7 @@ Build pythonForPandocFilter docker image: ...@@ -28,7 +28,7 @@ Build pythonForPandocFilter docker image:
- pandocFilter/setup.py - pandocFilter/setup.py
- pandocFilter/requirements.txt - pandocFilter/requirements.txt
- pandocFilter/pandocFilter.py - pandocFilter/pandocFilter.py
- pandocFilter/changemarks.py
Word CR text: Word CR text:
stage: generation stage: generation
...@@ -36,7 +36,7 @@ Word CR text: ...@@ -36,7 +36,7 @@ Word CR text:
- merge_requests - merge_requests
before_script: before_script:
- | - |
curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/generate_changemarks%2Esh/raw?ref=master" >> generate_changemarks.sh curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/generate_changemarks%2Esh/raw?ref=miguel" >> generate_changemarks.sh
- chmod +x generate_changemarks.sh - chmod +x generate_changemarks.sh
- | - |
curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/onem2m_delimiter_start%2Edocx/raw?ref=master" >> onem2m_delimiter_start.docx curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/onem2m_delimiter_start%2Edocx/raw?ref=master" >> onem2m_delimiter_start.docx
......
#!/bin/bash #!/bin/bash
DOCKER_IMAGE=forge.3gpp.org:5050/tools/3gpp-scripts/forgelib:v2.2.0 FORGELIB_DOCKER_IMAGE=forge.3gpp.org:5050/tools/3gpp-scripts/forgelib:miguel
PANDOC_FILTER_DOCKER_IMAGE=pandocfilter:latest
DOCKER_IMAGE=pandoc/core:3.1.1.0
echo "\n------ Checking for docker image --------" echo "\n------ Checking for docker image --------"
docker pull "$DOCKER_IMAGE" docker pull "$DOCKER_IMAGE"
...@@ -11,13 +13,26 @@ rm **/*.docx ...@@ -11,13 +13,26 @@ rm **/*.docx
echo "------ Parsing repo URL --------" echo "------ Parsing repo URL --------"
HOST_URL=$(echo $1 | cut -d'/' -f 1-3) HOST_URL=$(echo $1 | cut -d'/' -f 1-3)
PROJECT_NAME=$(echo $1 | cut -d'/' -f 4- | cut -d'.' -f 1) PROJECT_NAME=$(echo $1 | cut -d'/' -f 6- | cut -d'.' -f 1)
echo "HOST URL:" $HOST_URL echo "HOST URL:" $HOST_URL
echo "PROJECT NAME:" $PROJECT_NAME echo "PROJECT NAME:" $PROJECT_NAME
echo "PROJECT ID:" $2 echo "PROJECT ID:" $2
echo "MERGE IID:" $3 echo "MERGE IID:" $3
echo "\n------ Generating change marks --------" echo "\n------ Generating change marks MD --------"
docker container run --rm -v $(pwd):/tmp/ "$DOCKER_IMAGE" forgelib-changedocs -vv --combine --diffs --preprocessor onem2m --outPath=/tmp/docs --startdelimiter "/tmp/$4" --enddelimiter "/tmp/$5" --coversheet "/tmp/$6" "$HOST_URL" "$2" "$PROJECT_NAME" "$3" #docker container run --rm -v $(pwd):/tmp/ "$DOCKER_IMAGE" forgelib-changedocs -vv --combine --diffs --preprocessor onem2m --outPath=/tmp/docs --startdelimiter "/tmp/$4" --enddelimiter "/tmp/$5" --coversheet "/tmp/$6" "$HOST_URL" "$2" "$PROJECT_NAME" "$3"
docker container run --rm -v $(pwd):/tmp/ -u $(id -u):$(id -g) "$PANDOC_FILTER_DOCKER_IMAGE" changemarks -o "/tmp/out" "$HOST_URL" "$2" "$3"
echo "\n------ Generating changemarks docx --------"
for i in out/*.md ; do
DOCUMENT_NAME=$(echo $i | cut -d'/' -f 2)
echo "\n------ Preparaing spec --------"
docker run --rm -v $(pwd):/tmp/ -u $(id -u):$(id -g) "$PANDOC_FILTER_DOCKER_IMAGE" pandocFilter -o "/tmp/out" "/tmp/$i"
echo "\n------ Publishing spec --------"
docker run --rm -v $(pwd):/data -u $(id -u):$(id -g) "$DOCKER_IMAGE" "/data/$i" -f markdown -t docx --reference-doc "Spec-template.docx" -o "/data/out/${DOCUMENT_NAME}.docx"
done
echo "\n------ Combining docx --------"
docker container run --rm -v $(pwd):/tmp/ -u $(id -u):$(id -g) "$FORGELIB_DOCKER_IMAGE" forgelib-changedocs -vv -sf "/tmp/out/" --preprocessor onem2m --outPath=/tmp/docs --startdelimiter "/tmp/$4" --enddelimiter "/tmp/$5" --coversheet "/tmp/$6" "$HOST_URL" "$2" "$3"
exit 0 exit 0
No preview for this file type
No preview for this file type
No preview for this file type
##
# changemarks.py
#
# Script to generate a markdown file per clause modified in a merge request
#
# (c) 2023 by Miguel Angel Reina Ortega
# License: BSD 3-Clause License. See the LICENSE file for further details.
#
import argparse, os, re, sys
from rich import print
from rich.progress import Progress, TextColumn, TimeElapsedColumn
import logging
import requests
from unidiff import PatchSet
def fetch(url : str, expected_content_type : str = None) -> requests.Response:
r = requests.get(url)
logging.debug(f"Fetching {url}")
if (r.status_code != 200):
errorMessage = f"Failed attempting to retrieve {url}, status code {r.status_code}"
logging.error(errorMessage)
raise ValueError(errorMessage)
if expected_content_type:
if r.headers['Content-Type'] != expected_content_type:
errorMessage = f"Unexpected content type retrieving {url}. Expected {expected_content_type}, got {r.headers['Content-Type']}"
logging.error(errorMessage)
raise ValueError(errorMessage)
return r
def fetch_text(url : str, expected_content_type : str = None) -> str:
r = fetch(url, expected_content_type)
return r.text
def fetch_json(url : str, expected_content_type : str = None) -> requests.Response:
r = fetch(url, expected_content_type)
return r.json()
def readMDFile(progress:Progress, document:str) -> list[str]:
""" Read the markdown file and return a list of lines.
"""
_taskID = progress.add_task('[blue]Reading document', start=False, total=0)
# Check if file exists
if not os.path.exists(document):
print(f'File {document} does not exist')
exit(1)
# Read the file
with open(document, 'r', encoding='utf-8', errors = 'replace') as f:
progress.stop_task(_taskID)
return f.readlines()
def writeMDFile(progress:Progress, mdLines:list[str], document:str, outDirectory:str) -> None:
""" Write the markdown file.
"""
_taskID = progress.add_task('[blue]Writing document', start=False, total=0)
# Write the file
with open(f'{outDirectory}/{os.path.basename(document)}', 'w', encoding='utf-8', errors = 'replace') as f:
f.writelines(mdLines)
progress.stop_task(_taskID)
class Clause():
'''
Defines a clause of the base document
'''
def __init__(self, line, from_id, to_id, clause_nr):
self.raw = line
self.from_id = from_id
self.to_id = to_id
self.clause_nr = clause_nr
def find_all_clauses(progress:Progress, mdLines:list[str]):
'''
Scans the body of the document to find all clauses
Returns a list of Clauses, start index and end index
'''
_taskID = progress.add_task('[blue]Find all available clauses', start=False, total=0)
clauseregex = re.compile('^#+\s(\d(\.\d)*|Annex \w|\w*(\.\d)*).*')
clauses:list[Clause] = []
index = 1
empty = ""
clause = Clause(empty,0,0,empty)
for line in mdLines:
if line.startswith('#'):
matches = re.findall(clauseregex, line) # Match heading
if matches: # It may be the end of the clause or the start of a subclause
if index - 2 == clause.from_id: # It is a subclause
clause.from_id = index
clause.raw = line
clause.clause_nr = matches[0][0]
else: # It is the end of the clause
clause.to_id = index - 1
clauses.append(clause)
clause = Clause(line,index,index,matches[0][0])
else: # it is last clause
print("Unknown heading")
index = index + 1
# Append last clause (usually History)
clause.to_id = index - 1
clauses.append(clause)
return clauses
class MR:
def __init__(self, project_id, mr_id, root = "https://git.onem2m.org"):
self.project_id = project_id
self.mr_id = mr_id
self.root = root
self.raw_project_details = fetch_json(self.api_url())
self.web_url = self.raw_project_details['web_url']
self.raw_mr_details = fetch_json(self.api_url(f'/merge_requests/{self.mr_id}'))
self.author = self.raw_mr_details['author']['name']
self.target_branch = self.raw_mr_details['target_branch']
self.source_branch = self.raw_mr_details['source_branch']
self.title = self.raw_mr_details['title']
self.description = self.raw_mr_details['description']
self.raw_diff = fetch_text(f'{self.web_url}/-/merge_requests/{self.mr_id}.diff', expected_content_type='text/plain')
self.patch_set = PatchSet.from_string(self.raw_diff)
def api_url(self, route : str = "") -> str:
return f"{self.root}/api/v4/projects/{self.project_id}/{route}"
def retrieve_text(self, branch: str, filename: str) -> str:
return fetch_text(f"{self.web_url}/-/raw/{branch}/{filename}")
def find_changed_clauses(progress:Progress, mdLines:list[str], clauses:list[Clause], mr:MR, outDirectory:str ):
'''
Determine the clauses that have been modified by the merge request
https://forge.etsi.org/rep/cdm/pipeline-scripts/-/blob/main/common/Dockerfile.stfubuntu Returns a list of Clauses, start index and end index
'''
_taskID = progress.add_task('[blue]Find changed clauses', start=False, total=0)
changed_clauses:list[Clause] = []
empty = ""
changed_clause = Clause(empty,0,0,empty)
for patched_file in mr.patch_set:
if patched_file.source_file.startswith("a/TS"):
logging.debug(f"Looking at changes in {patched_file.source_file}")
for change in patched_file:
# Check the previous changed_clause
if (changed_clause.from_id <= change.target_start) and (changed_clause.to_id >= (change.target_start - 1 + change.target_length)):
generateMDforChange(progress, mdLines, changed_clause, change, outDirectory, True)
break
i = 0
# Check all clauses
for clause in clauses:
if (clause.from_id <= change.target_start) and (clause.to_id >= (change.target_start - 1 + change.target_length)):
changed_clause = clauses.pop(i)
changed_clauses.append(clause)
generateMDforChange(progress, mdLines, changed_clause, change, outDirectory, False)
break
i = i + 1
for clause in changed_clauses:
logging.debug(f"Clause {clause.clause_nr} contains modifications")
return changed_clauses
def generateMDforChange(progress:Progress, mdLines:list[str],changed_clause:Clause, change, outDirectory:str, existing_clause:bool):
'''
Generate the MD for the clauses that have been modified by the merge request
https://forge.etsi.org/rep/cdm/pipeline-scripts/-/blob/main/common/Dockerfile.stfubuntu Returns a list of Clauses, start index and end index
'''
_taskID = progress.add_task('[blue]Generate MD for changed clauses', start=False, total=0)
if not existing_clause:
index = changed_clause.from_id - 1
clauseMDlines: list[str] = []
while index < changed_clause.to_id:
clauseMDlines.append(mdLines[index]+'\n')
index = index + 1
else:
clauseMDlines = readMDFile(progress, changed_clause.clause_nr +'.md')
j = change.target_start - changed_clause.from_id # index gap
for line in change:
if (not (line.value.strip() == '') and (line.is_added)):
clauseMDlines.insert(j, "<span class=\"underline\">" + line.value + "</span>\n\n")
#clauseMDlines.insert(j, "<mark>" + line.value.strip("\n") + "</mark>\n\n")
clauseMDlines.pop(j+1)
elif line.is_removed:
clauseMDlines.insert(j, "~~" + line.value.strip() + "~~")
j = j + 1
writeMDFile(progress, clauseMDlines, changed_clause.clause_nr.replace(" ","") + '.md', outDirectory)
def process(document:str, outDirectory:str, mr:MR) -> None:
with Progress(TextColumn('{task.description}'), TimeElapsedColumn()) as progress:
sourceText = mr.retrieve_text(mr.source_branch, document)
sourceMdLines = sourceText.splitlines(keepends=False)
clauses = find_all_clauses(progress, sourceMdLines)
changed_clauses = find_changed_clauses(progress, sourceMdLines, clauses, mr, outDirectory)
def main(args=None):
# Parse command line arguments
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--outdir', '-o', action='store', dest='outDirectory', default = 'out', metavar = '<output directory>', help = 'specify output directory')
parser.add_argument('rootURL', help="Forge root URL")
parser.add_argument('projectID', help="Forge project ID")
parser.add_argument('mergeID', help="Merge IID")
pargs = parser.parse_args()
# Process documents and print output
os.makedirs(pargs.outDirectory, exist_ok = True)
mr = MR(pargs.projectID, pargs.mergeID, pargs.rootURL)
for patched_file in mr.patch_set:
if patched_file.source_file.startswith("a/TS"):
filename = patched_file.source_file.split("/")[1]
process(filename, pargs.outDirectory, mr)
else:
logging.debug(f"Cannot process file named {patched_file.source_file}")
if __name__ == '__main__':
sys.exit(main())
...@@ -12,3 +12,5 @@ pygments==2.15.1 ...@@ -12,3 +12,5 @@ pygments==2.15.1
# via rich # via rich
rich==13.3.5 rich==13.3.5
# via oneM2M-markdown-to-pandoc-filter (setup.py) # via oneM2M-markdown-to-pandoc-filter (setup.py)
requests==2.31.0
unidiff==0.7.5
...@@ -11,7 +11,8 @@ setup( ...@@ -11,7 +11,8 @@ setup(
'rich', 'rich',
], ],
entry_points= { entry_points= {
'console_scripts' : ['pandocFilter=pandocFilter:main'] 'console_scripts' : ['pandocFilter=pandocFilter:main',
'changemarks=changemarks:main']
} }
) )
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment