diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index cecaed0b20302ca72266b1a7879a7fb8c15576e1..03517a61ea58999219623e0d19e54a80c4b9d69c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -28,7 +28,7 @@ Build pythonForPandocFilter docker image: - pandocFilter/setup.py - pandocFilter/requirements.txt - pandocFilter/pandocFilter.py - + - pandocFilter/changemarks.py Word CR text: stage: generation @@ -36,7 +36,7 @@ Word CR text: - merge_requests before_script: - | - curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/generate_changemarks%2Esh/raw?ref=master" >> generate_changemarks.sh + curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/generate_changemarks%2Esh/raw?ref=miguel" >> generate_changemarks.sh - chmod +x generate_changemarks.sh - | curl "${CI_API_V4_URL}/projects/$TOOLS_SCRIPTS_PROJECT_ID/repository/files/onem2m_delimiter_start%2Edocx/raw?ref=master" >> onem2m_delimiter_start.docx diff --git a/generate_changemarks.sh b/generate_changemarks.sh index 0861f57dfed4e4573838c9ced8da3c9375b09d82..2bf7be3bc61b6ab7e374e6bed3817553f2c61c89 100644 --- a/generate_changemarks.sh +++ b/generate_changemarks.sh @@ -1,6 +1,8 @@ #!/bin/bash -DOCKER_IMAGE=forge.3gpp.org:5050/tools/3gpp-scripts/forgelib:v2.2.0 +FORGELIB_DOCKER_IMAGE=forge.3gpp.org:5050/tools/3gpp-scripts/forgelib:miguel +PANDOC_FILTER_DOCKER_IMAGE=pandocfilter:latest +DOCKER_IMAGE=pandoc/core:3.1.1.0 echo "\n------ Checking for docker image --------" docker pull "$DOCKER_IMAGE" @@ -11,13 +13,26 @@ rm **/*.docx echo "------ Parsing repo URL --------" HOST_URL=$(echo $1 | cut -d'/' -f 1-3) -PROJECT_NAME=$(echo $1 | cut -d'/' -f 4- | cut -d'.' -f 1) +PROJECT_NAME=$(echo $1 | cut -d'/' -f 6- | cut -d'.' -f 1) echo "HOST URL:" $HOST_URL echo "PROJECT NAME:" $PROJECT_NAME echo "PROJECT ID:" $2 echo "MERGE IID:" $3 -echo "\n------ Generating change marks --------" -docker container run --rm -v $(pwd):/tmp/ "$DOCKER_IMAGE" forgelib-changedocs -vv --combine --diffs --preprocessor onem2m --outPath=/tmp/docs --startdelimiter "/tmp/$4" --enddelimiter "/tmp/$5" --coversheet "/tmp/$6" "$HOST_URL" "$2" "$PROJECT_NAME" "$3" +echo "\n------ Generating change marks MD --------" +#docker container run --rm -v $(pwd):/tmp/ "$DOCKER_IMAGE" forgelib-changedocs -vv --combine --diffs --preprocessor onem2m --outPath=/tmp/docs --startdelimiter "/tmp/$4" --enddelimiter "/tmp/$5" --coversheet "/tmp/$6" "$HOST_URL" "$2" "$PROJECT_NAME" "$3" +docker container run --rm -v $(pwd):/tmp/ -u $(id -u):$(id -g) "$PANDOC_FILTER_DOCKER_IMAGE" changemarks -o "/tmp/out" "$HOST_URL" "$2" "$3" + +echo "\n------ Generating changemarks docx --------" +for i in out/*.md ; do + DOCUMENT_NAME=$(echo $i | cut -d'/' -f 2) + echo "\n------ Preparaing spec --------" + docker run --rm -v $(pwd):/tmp/ -u $(id -u):$(id -g) "$PANDOC_FILTER_DOCKER_IMAGE" pandocFilter -o "/tmp/out" "/tmp/$i" + echo "\n------ Publishing spec --------" + docker run --rm -v $(pwd):/data -u $(id -u):$(id -g) "$DOCKER_IMAGE" "/data/$i" -f markdown -t docx --reference-doc "Spec-template.docx" -o "/data/out/${DOCUMENT_NAME}.docx" +done + +echo "\n------ Combining docx --------" +docker container run --rm -v $(pwd):/tmp/ -u $(id -u):$(id -g) "$FORGELIB_DOCKER_IMAGE" forgelib-changedocs -vv -sf "/tmp/out/" --preprocessor onem2m --outPath=/tmp/docs --startdelimiter "/tmp/$4" --enddelimiter "/tmp/$5" --coversheet "/tmp/$6" "$HOST_URL" "$2" "$3" exit 0 diff --git a/onem2m_coversheet_template.docx b/onem2m_coversheet_template.docx index e93264f9c2751c408774ea4965565b0cec72a962..e59b7a310a864a161e843427276c01f11ee9516f 100644 Binary files a/onem2m_coversheet_template.docx and b/onem2m_coversheet_template.docx differ diff --git a/onem2m_delimiter_end.docx b/onem2m_delimiter_end.docx index 4e5bfed1ded1ae4614de8815b33a3ac60ba81044..c86f45b8583c94167a34463367014cdff113e749 100644 Binary files a/onem2m_delimiter_end.docx and b/onem2m_delimiter_end.docx differ diff --git a/onem2m_delimiter_start.docx b/onem2m_delimiter_start.docx index 5b786d7e9db7dcaff417f932cb010364ed502c4f..ec52d4eb637802df8f3514ce4ca68c39763e7f17 100644 Binary files a/onem2m_delimiter_start.docx and b/onem2m_delimiter_start.docx differ diff --git a/pandocFilter/changemarks.py b/pandocFilter/changemarks.py new file mode 100644 index 0000000000000000000000000000000000000000..9d4a0a52506711a10923f4a13c9645d06c231d55 --- /dev/null +++ b/pandocFilter/changemarks.py @@ -0,0 +1,236 @@ +## +# changemarks.py +# +# Script to generate a markdown file per clause modified in a merge request +# +# (c) 2023 by Miguel Angel Reina Ortega +# License: BSD 3-Clause License. See the LICENSE file for further details. +# + +import argparse, os, re, sys +from rich import print +from rich.progress import Progress, TextColumn, TimeElapsedColumn +import logging +import requests +from unidiff import PatchSet + +def fetch(url : str, expected_content_type : str = None) -> requests.Response: + r = requests.get(url) + logging.debug(f"Fetching {url}") + if (r.status_code != 200): + errorMessage = f"Failed attempting to retrieve {url}, status code {r.status_code}" + logging.error(errorMessage) + raise ValueError(errorMessage) + if expected_content_type: + if r.headers['Content-Type'] != expected_content_type: + errorMessage = f"Unexpected content type retrieving {url}. Expected {expected_content_type}, got {r.headers['Content-Type']}" + logging.error(errorMessage) + raise ValueError(errorMessage) + return r + +def fetch_text(url : str, expected_content_type : str = None) -> str: + r = fetch(url, expected_content_type) + return r.text + +def fetch_json(url : str, expected_content_type : str = None) -> requests.Response: + r = fetch(url, expected_content_type) + return r.json() + +def readMDFile(progress:Progress, document:str) -> list[str]: + """ Read the markdown file and return a list of lines. + """ + _taskID = progress.add_task('[blue]Reading document', start=False, total=0) + + # Check if file exists + if not os.path.exists(document): + print(f'File {document} does not exist') + exit(1) + + # Read the file + with open(document, 'r', encoding='utf-8', errors = 'replace') as f: + progress.stop_task(_taskID) + return f.readlines() + + +def writeMDFile(progress:Progress, mdLines:list[str], document:str, outDirectory:str) -> None: + """ Write the markdown file. + + """ + _taskID = progress.add_task('[blue]Writing document', start=False, total=0) + + # Write the file + with open(f'{outDirectory}/{os.path.basename(document)}', 'w', encoding='utf-8', errors = 'replace') as f: + f.writelines(mdLines) + progress.stop_task(_taskID) + + +class Clause(): + ''' + Defines a clause of the base document + ''' + + def __init__(self, line, from_id, to_id, clause_nr): + self.raw = line + self.from_id = from_id + self.to_id = to_id + self.clause_nr = clause_nr + + + +def find_all_clauses(progress:Progress, mdLines:list[str]): + ''' + Scans the body of the document to find all clauses + Returns a list of Clauses, start index and end index + ''' + + _taskID = progress.add_task('[blue]Find all available clauses', start=False, total=0) + + clauseregex = re.compile('^#+\s(\d(\.\d)*|Annex \w|\w*(\.\d)*).*') + clauses:list[Clause] = [] + + index = 1 + empty = "" + clause = Clause(empty,0,0,empty) + + for line in mdLines: + if line.startswith('#'): + matches = re.findall(clauseregex, line) # Match heading + if matches: # It may be the end of the clause or the start of a subclause + if index - 2 == clause.from_id: # It is a subclause + clause.from_id = index + clause.raw = line + clause.clause_nr = matches[0][0] + else: # It is the end of the clause + clause.to_id = index - 1 + clauses.append(clause) + clause = Clause(line,index,index,matches[0][0]) + else: # it is last clause + print("Unknown heading") + + index = index + 1 + + # Append last clause (usually History) + clause.to_id = index - 1 + clauses.append(clause) + + return clauses + +class MR: + def __init__(self, project_id, mr_id, root = "https://git.onem2m.org"): + self.project_id = project_id + self.mr_id = mr_id + self.root = root + self.raw_project_details = fetch_json(self.api_url()) + self.web_url = self.raw_project_details['web_url'] + self.raw_mr_details = fetch_json(self.api_url(f'/merge_requests/{self.mr_id}')) + self.author = self.raw_mr_details['author']['name'] + self.target_branch = self.raw_mr_details['target_branch'] + self.source_branch = self.raw_mr_details['source_branch'] + self.title = self.raw_mr_details['title'] + self.description = self.raw_mr_details['description'] + self.raw_diff = fetch_text(f'{self.web_url}/-/merge_requests/{self.mr_id}.diff', expected_content_type='text/plain') + self.patch_set = PatchSet.from_string(self.raw_diff) + + def api_url(self, route : str = "") -> str: + return f"{self.root}/api/v4/projects/{self.project_id}/{route}" + def retrieve_text(self, branch: str, filename: str) -> str: + return fetch_text(f"{self.web_url}/-/raw/{branch}/{filename}") + +def find_changed_clauses(progress:Progress, mdLines:list[str], clauses:list[Clause], mr:MR, outDirectory:str ): + ''' + Determine the clauses that have been modified by the merge request +https://forge.etsi.org/rep/cdm/pipeline-scripts/-/blob/main/common/Dockerfile.stfubuntu Returns a list of Clauses, start index and end index + ''' + + _taskID = progress.add_task('[blue]Find changed clauses', start=False, total=0) + + changed_clauses:list[Clause] = [] + empty = "" + changed_clause = Clause(empty,0,0,empty) + + for patched_file in mr.patch_set: + if patched_file.source_file.startswith("a/TS"): + logging.debug(f"Looking at changes in {patched_file.source_file}") + for change in patched_file: + # Check the previous changed_clause + if (changed_clause.from_id <= change.target_start) and (changed_clause.to_id >= (change.target_start - 1 + change.target_length)): + generateMDforChange(progress, mdLines, changed_clause, change, outDirectory, True) + break + i = 0 + # Check all clauses + for clause in clauses: + if (clause.from_id <= change.target_start) and (clause.to_id >= (change.target_start - 1 + change.target_length)): + changed_clause = clauses.pop(i) + changed_clauses.append(clause) + generateMDforChange(progress, mdLines, changed_clause, change, outDirectory, False) + break + i = i + 1 + + for clause in changed_clauses: + logging.debug(f"Clause {clause.clause_nr} contains modifications") + + return changed_clauses + +def generateMDforChange(progress:Progress, mdLines:list[str],changed_clause:Clause, change, outDirectory:str, existing_clause:bool): + ''' + Generate the MD for the clauses that have been modified by the merge request +https://forge.etsi.org/rep/cdm/pipeline-scripts/-/blob/main/common/Dockerfile.stfubuntu Returns a list of Clauses, start index and end index + ''' + + _taskID = progress.add_task('[blue]Generate MD for changed clauses', start=False, total=0) + + if not existing_clause: + index = changed_clause.from_id - 1 + clauseMDlines: list[str] = [] + while index < changed_clause.to_id: + clauseMDlines.append(mdLines[index]+'\n') + index = index + 1 + else: + clauseMDlines = readMDFile(progress, changed_clause.clause_nr +'.md') + + j = change.target_start - changed_clause.from_id # index gap + for line in change: + if (not (line.value.strip() == '') and (line.is_added)): + clauseMDlines.insert(j, "<span class=\"underline\">" + line.value + "</span>\n\n") + #clauseMDlines.insert(j, "<mark>" + line.value.strip("\n") + "</mark>\n\n") + clauseMDlines.pop(j+1) + elif line.is_removed: + clauseMDlines.insert(j, "~~" + line.value.strip() + "~~") + + j = j + 1 + + writeMDFile(progress, clauseMDlines, changed_clause.clause_nr.replace(" ","") + '.md', outDirectory) + +def process(document:str, outDirectory:str, mr:MR) -> None: + with Progress(TextColumn('{task.description}'), TimeElapsedColumn()) as progress: + sourceText = mr.retrieve_text(mr.source_branch, document) + sourceMdLines = sourceText.splitlines(keepends=False) + clauses = find_all_clauses(progress, sourceMdLines) + changed_clauses = find_changed_clauses(progress, sourceMdLines, clauses, mr, outDirectory) + +def main(args=None): + # Parse command line arguments + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--outdir', '-o', action='store', dest='outDirectory', default = 'out', metavar = '<output directory>', help = 'specify output directory') + parser.add_argument('rootURL', help="Forge root URL") + parser.add_argument('projectID', help="Forge project ID") + parser.add_argument('mergeID', help="Merge IID") + + pargs = parser.parse_args() + + # Process documents and print output + + os.makedirs(pargs.outDirectory, exist_ok = True) + + mr = MR(pargs.projectID, pargs.mergeID, pargs.rootURL) + + for patched_file in mr.patch_set: + if patched_file.source_file.startswith("a/TS"): + filename = patched_file.source_file.split("/")[1] + process(filename, pargs.outDirectory, mr) + else: + logging.debug(f"Cannot process file named {patched_file.source_file}") + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/pandocFilter/requirements.txt b/pandocFilter/requirements.txt index 4c6b8069e2580480ca25a7320da52845b3579d4c..986b28ef5633586a033c1c37b136288807d8d0c1 100644 --- a/pandocFilter/requirements.txt +++ b/pandocFilter/requirements.txt @@ -12,3 +12,5 @@ pygments==2.15.1 # via rich rich==13.3.5 # via oneM2M-markdown-to-pandoc-filter (setup.py) +requests==2.31.0 +unidiff==0.7.5 diff --git a/pandocFilter/setup.py b/pandocFilter/setup.py index 7d47e526d3ec3967009e32f6287050c8cc1ef884..066d12e78189d90038e6379113aa0de4a600bf75 100644 --- a/pandocFilter/setup.py +++ b/pandocFilter/setup.py @@ -11,7 +11,8 @@ setup( 'rich', ], entry_points= { - 'console_scripts' : ['pandocFilter=pandocFilter:main'] + 'console_scripts' : ['pandocFilter=pandocFilter:main', + 'changemarks=changemarks:main'] } )