From 805e55dc9635ad880017d32b2cb27a3933fd5a41 Mon Sep 17 00:00:00 2001 From: Miguel Angel Reina Ortega <miguelangel.reinaortega@etsi.org> Date: Tue, 27 Feb 2024 10:51:22 +0100 Subject: [PATCH] Rewrite the way changes are extracted into md files --- generateChangemarks/changemarks.py | 201 ++++++++++++++++++++++++++++- 1 file changed, 197 insertions(+), 4 deletions(-) diff --git a/generateChangemarks/changemarks.py b/generateChangemarks/changemarks.py index 81f04a1..a87d8c9 100644 --- a/generateChangemarks/changemarks.py +++ b/generateChangemarks/changemarks.py @@ -6,7 +6,7 @@ # (c) 2023 by Miguel Angel Reina Ortega # License: BSD 3-Clause License. See the LICENSE file for further details. # - +from typing import Tuple import argparse, os, re, sys from rich import print from rich.progress import Progress, TextColumn, TimeElapsedColumn @@ -341,16 +341,209 @@ https://forge.etsi.org/rep/cdm/pipeline-scripts/-/blob/main/common/Dockerfile.st #clauseMDlines.insert(j, "\n\n<br />") writeMDFile(progress, clauseMDlines, changed_clause.clause_nr.replace(" ","") + '.md', outDirectory) + +def integrate_changes(progress: Progress, mdLines: list[str], mr: MR) -> list[Tuple[str, bool]]: + ''' + Integrate changes from merge request to the target document +https://forge.etsi.org/rep/cdm/pipeline-scripts/-/blob/main/common/Dockerfile.stfubuntu Returns a list of Clauses, start index and end index + ''' + + _taskID = progress.add_task('[blue]Integrate changes', start=False, total=0) + + spec_with_changes: list[Tuple[str, bool]] = [] + + changed_clauses: list[Clause] = [] + empty = "" + changed_clause = Clause(empty, 0, 0, empty) + + for patched_file in mr.patch_set: + index_source = 1 + if patched_file.source_file.startswith("a/TS") or patched_file.source_file.startswith("a/TR"): + logging.debug(f"Looking at changes in {patched_file.source_file}") + lines_added = 0 + lines_removed = 0 + previous_change_lines_added = 0 + for change in patched_file: + logging.debug(f'Change from patch details: source_start: {change.source_start} - target_start: {change.target_start}') + while index_source < change.source_start: + if mdLines[index_source-1].startswith("#"): + spec_with_changes.append((mdLines[index_source-1]+"\n\n", False)) + else: + spec_with_changes.append((mdLines[index_source - 1]+"\n", False)) + index_source += 1 + + # Sanity check + if change.target_start == index_source + lines_added - lines_removed: + logging.debug( + f'Change applied correctly, indexes on track. Added {lines_added} lines and removed {lines_removed} lines') + else: + logging.debug(f'Something is wrong ...') + break + + change_lines_added = 0 + change_lines_removed = 0 + for line in change: + #print(index_source, len(mdLines)) + if line.is_added: + spec_with_changes.append((addedLine(line), True)) + change_lines_added += 1 + elif line.is_removed: + spec_with_changes.append((removedLine(line), True)) + index_source += 1 + change_lines_removed += 1 + else: + if line.value.startswith("#"): + spec_with_changes.append((line.value+"\n\n", False)) + else: + spec_with_changes.append((line.value + "\n", False)) + #spec_with_changes.append((mdLines[index_source-1], False)) + index_source += 1 + + lines_added += change_lines_added + lines_removed += change_lines_removed + + logging.debug(f'Applied changes. Total added lines: {lines_added}.Total removed lines {lines_removed}') + + return spec_with_changes + +def addedLine(line: str) -> str: + if line.value.strip().startswith("|"): # It is a table + tableElements = line.value.strip().split("|") + modifiedElements: list[str] = [] + for element in tableElements: + if not element.strip() == '': + modifiedElements.append("<span class=\"underline\">" + element.strip() + "</span>") + # modifiedRow = "|" + "|".join(modifiedElements) + "|" + "\n" + else: + modifiedElements.append(" ") + modifiedRow = "|".join(modifiedElements) + return modifiedRow + "\n" + else: + if not line.value.strip() == '': + if line.value.startswith("!["): # It is a figure + return line.value.strip() + "\n\n" #TODO How a figure should be marked if it is modified + # clauseMDlines.insert(j, "<span class=\"underline\">" + line.value.strip() + "</span>\n") #Track change OK Caption Not OK + else: + return "<span class=\"underline\">" + line.value.strip() + "</span>" + "\n\n" # it works for simple lines, not for lines in a list + # clauseMDlines.insert(j, "<mark>" + line.value.strip("\n") + "</mark>\n\n") + # if (j + 1) <= len(clauseMDlines): + # clauseMDlines.pop(j+1) + else: + return line.value # Add an extra line not marked as added + #return "\n" # Add an extra line not marked as added + +def removedLine(line: str) -> str: + if line.value.strip().startswith("|"): # It is a table + tableElements = line.value.strip().split("|") + modifiedElements: list[str] = [] + for element in tableElements: + if not element.strip() == '': + modifiedElements.append("~~" + element.strip() + "~~ ") + # modifiedRow = "|" + "|".join(modifiedElements) + "|" + "\n" + else: + modifiedElements.append(" ") + modifiedRow = "|".join(modifiedElements) + return modifiedRow + "\n" + else: + if not line.value.strip() == '': + return "~~" + line.value.strip() + "~~" + "\n\n" + + return line.value + +def find_clauses_with_changes(progress: Progress, mdLines_changes: list[Tuple[str, bool]]) -> Tuple[list[Clause], list[Clause]]: + ''' + Scans the body of the document to find all clauses + Returns a list of Clauses, start index and end index + ''' + + _taskID = progress.add_task('[blue]Find clauses with changes', start=False, total=0) + + clauseregex = re.compile('^#+\s(\d+(\.\d+)*|Annex \w(\.\d+)*|\w*(\.\d+)*).*') + clauses: list[Clause] = [] + changed_clauses: list[Clause] = [] + changeInClause = False + empty = "" + clause = Clause(empty, 1, 1, "0") + + # for line in mdLines: + # if line.startswith('#'): + # # Clause 0 (from start to first clause) found + # clause.to_id = index - 1 + # clauses.append(clause) + # break + # index = index + 1 + + index = 1 + for line, change in mdLines_changes: + if change: + changeInClause = True + if line.startswith('#'): + matches = re.findall(clauseregex, line) # Match heading + if matches: # It may be the end of the clause or the start of a subclause + if index - 2 == clause.from_id: # It is a subclause + clause.from_id = index + clause.raw = line + clause.clause_nr = matches[0][0] + else: # It is the end of the clause + clause.to_id = index - 1 + clauses.append(clause) + if changeInClause: + changed_clauses.append(clause) + clause = Clause(line, index, index, matches[0][0]) + changeInClause = False + else: # it is last clause + print("Unknown heading") + + index = index + 1 + + # Append last clause (usually History) + clause.to_id = index - 1 + clauses.append(clause) + if changeInClause: + changed_clauses.append(clause) + + logging.debug(f"Number of clauses: {len(clauses)}") + for clause in clauses: + logging.debug(clause.clause_nr) + logging.debug(clause.from_id) + logging.debug(clause.to_id) + + return (clauses, changed_clauses) + +def saveChangedClauses(progress: Progress, outDirectory:str, changed_clauses: list[Clause], mdLines: list[str]): + for clause in changed_clauses: + clauseMDLines = [] + i = clause.from_id + while i < clause.to_id: + clauseMDLines.append(mdLines[i-1]) + i += 1 + writeMDFile(progress, clauseMDLines, clause.clause_nr.replace(" ", "") + '.md', outDirectory) + + def process(document:str, outDirectory:str, mr:MR) -> None: with Progress(TextColumn('{task.description}'), TimeElapsedColumn()) as progress: + #old process + #sourceText = mr.retrieve_text(mr.target_branch, document) + #sourceMdLines = sourceText.splitlines(keepends=False) + #clauses = find_all_clauses(progress, sourceMdLines) + #changed_clauses = find_changed_clauses(progress, sourceMdLines, clauses, mr, outDirectory) + #Export list of changed clauses + #with open(f'{outDirectory}/changedClauses.txt', "w", encoding='utf-8', errors='replace') as f: + # f.write("\n".join([clause.clause_nr for clause in changed_clauses])) + # f.close() + + #new process sourceText = mr.retrieve_text(mr.target_branch, document) sourceMdLines = sourceText.splitlines(keepends=False) - clauses = find_all_clauses(progress, sourceMdLines) - changed_clauses = find_changed_clauses(progress, sourceMdLines, clauses, mr, outDirectory) - #Export list of changed clauses + targetMdLines_changes = integrate_changes(progress,sourceMdLines, mr) + #print(targetMdLines) + all_clauses, changed_clauses = find_clauses_with_changes(progress, targetMdLines_changes) + # Export list of changed clauses with open(f'{outDirectory}/changedClauses.txt', "w", encoding='utf-8', errors='replace') as f: f.write("\n".join([clause.clause_nr for clause in changed_clauses])) f.close() + saveChangedClauses(progress, outDirectory, changed_clauses, [l[0] for l in targetMdLines_changes]) + def main(args=None): # Parse command line arguments -- GitLab