From ea350cbcb24d3552cab8ca56c98cc64e1b96623c Mon Sep 17 00:00:00 2001 From: Miguel Angel Reina Ortega <miguelangel.reinaortega@etsi.org> Date: Thu, 7 Dec 2023 05:07:25 +0100 Subject: [PATCH] Several fixes: - Get the indexes of the change - Detect the changed clause - Correct generation of the MD with changes marks Some fixes: - the complete ToC clause is analysed (between clause marks) - figura caption regexp to catch figure caption in any part of the line --- .gitlab-ci.yml | 1 + generateChangemarks/changemarks.py | 120 ++++++++++++++++++++-------- generateChangemarks/pandocFilter.py | 7 +- 3 files changed, 91 insertions(+), 37 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0c11cd0..570d00f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -63,6 +63,7 @@ Word CR text: Protect branch: stage: generation when: on_success + needs: ["Word CR text"] only: - merge_requests script: diff --git a/generateChangemarks/changemarks.py b/generateChangemarks/changemarks.py index 7432ab0..600e551 100644 --- a/generateChangemarks/changemarks.py +++ b/generateChangemarks/changemarks.py @@ -90,8 +90,17 @@ def find_all_clauses(progress:Progress, mdLines:list[str]): index = 1 empty = "" - clause = Clause(empty,0,0,empty) - + clause = Clause(empty,0,0,"0") + + #for line in mdLines: + # if line.startswith('#'): + # # Clause 0 (from start to first clause) found + # clause.to_id = index - 1 + # clauses.append(clause) + # break + # index = index + 1 + + index = 1 for line in mdLines: if line.startswith('#'): matches = re.findall(clauseregex, line) # Match heading @@ -113,8 +122,13 @@ def find_all_clauses(progress:Progress, mdLines:list[str]): clause.to_id = index - 1 clauses.append(clause) - logging.debug("Number of clauses: {len(clauses)}") - return clauses + logging.debug(f"Number of clauses: {len(clauses)}") + for clause in clauses: + logging.debug(clause.clause_nr) + logging.debug(clause.from_id) + logging.debug(clause.to_id) + + return clauses class MR: def __init__(self, project_id, mr_id, root = "https://git.onem2m.org"): @@ -155,25 +169,28 @@ https://forge.etsi.org/rep/cdm/pipeline-scripts/-/blob/main/common/Dockerfile.st logging.debug(f"Looking at changes in {patched_file.source_file}") lines_added = 0 lines_removed = 0 + previous_change_lines_added = 0 for change in patched_file: + logging.debug(f'Change from patch details: source_start: {change.source_start} - target_start: {change.target_start}') change_start_line, change_end_line, change_lines_added, change_lines_removed = changeDetails(change) logging.debug(f"Change starting at line {change_start_line} and ending at line {change_end_line}") logging.debug(f"Change containing {change_lines_added} added lines and {change_lines_removed} lines removed") lines_added = lines_added + change_lines_added lines_removed = lines_removed + change_lines_removed # Check the previous changed_clause - if (changed_clause.from_id <= change_start_line) and (changed_clause.to_id + lines_added - lines_removed >= change_end_line): - generateMDforChange(progress, mdLines, changed_clause, change, outDirectory, True) + if (changed_clause.from_id <= change_start_line) and (changed_clause.to_id >= change_end_line): + generateMDforChange(progress, mdLines, changed_clause, change, outDirectory, True, previous_change_lines_added) continue i = 0 # Check all clauses for clause in clauses: - if (clause.from_id <= change_start_line) and (clause.to_id + lines_added - lines_removed >= change_end_line): + if (clause.from_id <= change_start_line) and (clause.to_id >= change_end_line): changed_clause = clauses.pop(i) changed_clauses.append(clause) - generateMDforChange(progress, mdLines, changed_clause, change, outDirectory, False) + generateMDforChange(progress, mdLines, changed_clause, change, outDirectory, False, previous_change_lines_added) break i = i + 1 + previous_change_lines_added = change_lines_added logging.info(f"Total number of lines added is {lines_added}") logging.info(f"Total number of lines removed is {lines_removed}") @@ -186,10 +203,17 @@ def changeDetails(change) -> (int, int, int, int): i = 0 lines_added = 0 lines_removed = 0 - change_start_line = change.target_start + change_start_line = change.source_start change_end_line = change_start_line for line in change: - #print(vars(line)) + if line.is_added: + logging.debug(f'Line of change: (ADDED) source_line_no: {line.source_line_no} - target_line_no: {line.target_line_no} - value: {line.value}') + elif line.is_removed: + logging.debug( + f'Line of change: (REMOVED) source_line_no: {line.source_line_no} - target_line_no: {line.target_line_no} - value: {line.value}') + else: + logging.debug(f'Line of change: source_line_no: {line.source_line_no} - target_line_no: {line.target_line_no} - value: {line.value}') + if line.is_added or line.is_removed: #if change_start_line == change.target_start: # change_start_line = change.target_start + i @@ -201,16 +225,21 @@ def changeDetails(change) -> (int, int, int, int): lines_added = lines_added + 1 elif line.is_removed: lines_removed = lines_removed + 1 - if change_start_line == change.target_start: + if change_start_line == change.source_start: if line.is_added: - change_start_line = line.target_line_no - change_end_line = change_start_line + if change.source_start > change.target_start: # there are more lines removed than lines added + change_start_line = line.target_line_no - 1 + change.source_start - change.target_start + else: + change_start_line = line.target_line_no - 1 + change.target_start - change.source_start # There are more lines added than removed lines elif line.is_removed: - change_start_line = line.source_line_no - change_end_line = change_start_line + change_start_line = line.source_line_no - 1 + change_end_line = change_start_line else: if line.is_added: - change_end_line = line.target_line_no + if change.source_start > change.target_start: + change_end_line = line.target_line_no + change.source_start - change.target_start + lines_removed - lines_added # There are more lines added than removed lines + else: + change_end_line = line.target_line_no + change.target_start - change.source_start + lines_removed - lines_added elif line.is_removed: change_end_line = line.source_line_no #i = i + 1 @@ -218,7 +247,7 @@ def changeDetails(change) -> (int, int, int, int): #change_end_line = change_end_line - lines_removed return change_start_line, change_end_line, lines_added, lines_removed -def generateMDforChange(progress:Progress, mdLines:list[str],changed_clause:Clause, change, outDirectory:str, existing_clause:bool): +def generateMDforChange(progress:Progress, mdLines:list[str],changed_clause:Clause, change, outDirectory:str, existing_clause:bool, previous_change_lines_added:int): ''' Generate the MD for the clauses that have been modified by the merge request https://forge.etsi.org/rep/cdm/pipeline-scripts/-/blob/main/common/Dockerfile.stfubuntu Returns a list of Clauses, start index and end index @@ -235,17 +264,25 @@ https://forge.etsi.org/rep/cdm/pipeline-scripts/-/blob/main/common/Dockerfile.st else: clauseMDlines = readMDFile(progress, outDirectory + '/' + changed_clause.clause_nr +'.md') + logging.info(f"Lines in clause {changed_clause.clause_nr} is {len(clauseMDlines)}") if change.source_start >= changed_clause.from_id: + j = change.source_start - changed_clause.from_id index_gap = change.target_start - change.source_start elif change.source_start < changed_clause.from_id: + j = changed_clause.from_id - change.source_start index_gap = change.source_start - change.target_start - j = change.target_start - changed_clause.from_id + index_gap + if existing_clause: + j = j + previous_change_lines_added + logging.info(f"Change.source_start {change.source_start}") + logging.info(f"Changed_clause.from_id {changed_clause.from_id}") + for line in change: - if line.source_line_no != None and line.source_line_no < changed_clause.from_id: - j = 0 - continue - else: + # It should deal with the case where source line number is not within the changed clause index (too many removed lines in other clauses, i.e.) + #if line.source_line_no != None and line.source_line_no < changed_clause.from_id: + # j = j + # continue + #else: #if (not (line.value.strip() == '') and (line.is_added)): #print(vars(line)) if line.is_added: @@ -254,19 +291,25 @@ https://forge.etsi.org/rep/cdm/pipeline-scripts/-/blob/main/common/Dockerfile.st modifiedElements:list[str] = [] for element in tableElements: if not element.strip() == '': - modifiedElements.append("<span class=\"underline\">" + element.strip() + "</span>") + modifiedElements.append("<span class=\"underline\">" + element.strip() + "</span> ") #modifiedRow = "|" + "|".join(modifiedElements) + "|" + "\n" else: - modifiedElements.append(element) - modifiedRow = "|".join(modifiedElements) + "\n" - clauseMDlines.insert(j,modifiedRow) + modifiedElements.append(" ") + modifiedRow = "|".join(modifiedElements) + clauseMDlines.insert(j,modifiedRow + "\n") clauseMDlines.pop(j + 1) # Todo Check what happens when modifying last row of a table else: if not line.value.strip() == '': - clauseMDlines.insert(j, "<span class=\"underline\">" + line.value.strip() + "</span>\n\n") + if line.value.startswith("!["): # It is a figure + clauseMDlines.insert(j, line.value.strip() + "\n\n") + # clauseMDlines.insert(j, "<span class=\"underline\">" + line.value.strip() + "</span>\n") #Track change OK Caption Not OK + else: + clauseMDlines.insert(j, "<span class=\"underline\">" + line.value.strip() + "</span>\n\n") #it works for simple lines, not for lines in a list #clauseMDlines.insert(j, "<mark>" + line.value.strip("\n") + "</mark>\n\n") #if (j + 1) <= len(clauseMDlines): # clauseMDlines.pop(j+1) + else: + clauseMDlines.insert(j, "\n") #Add an extra line not marked as added #elif (not (line.value.strip() == '') and (line.is_removed)): elif line.is_removed: if line.value.strip().startswith("|"): # It is a table @@ -274,17 +317,25 @@ https://forge.etsi.org/rep/cdm/pipeline-scripts/-/blob/main/common/Dockerfile.st modifiedElements: list[str] = [] for element in tableElements: if not element.strip() == '': - modifiedElements.append("~~" + element.strip() + "~~") + modifiedElements.append("~~" + element.strip() + "~~ ") #modifiedRow = "|" + "|".join(modifiedElements) + "|" + "\n" else: - modifiedElements.append(element) - modifiedRow = "|".join(modifiedElements) + "\n" - clauseMDlines.insert(j, modifiedRow) + modifiedElements.append(" ") + modifiedRow = "|".join(modifiedElements) + clauseMDlines.insert(j, modifiedRow + "\n") + if (j + 1) <= len(clauseMDlines): + clauseMDlines.pop(j+1) else: + logging.debug(f"Index to delete is {j}") if not line.value.strip() == '': - clauseMDlines.insert(j, "~~" + line.value.strip() + "~~\n") - if (j + 1) <= len(clauseMDlines): - clauseMDlines.pop(j+1) + clauseMDlines.insert(j, "~~" + line.value.strip() + "~~\n\n") + #else: + # clauseMDlines.insert(j, "~~\t~~\n") + #clauseMDlines.insert(j, "\n") + if (j + 1) <= len(clauseMDlines): + logging.debug(f'Line marked as removed: {clauseMDlines[j]}') + logging.debug(f'Line being removed: {clauseMDlines[j+1]}') + clauseMDlines.pop(j+1) j = j + 1 #clauseMDlines.insert(j, "\n\n<br />") @@ -310,6 +361,7 @@ def main(args=None): parser.add_argument('mergeID', help="Merge IID") pargs = parser.parse_args() + logging.basicConfig(level=logging.INFO) # Process documents and print output diff --git a/generateChangemarks/pandocFilter.py b/generateChangemarks/pandocFilter.py index a07b778..41b6ca5 100644 --- a/generateChangemarks/pandocFilter.py +++ b/generateChangemarks/pandocFilter.py @@ -65,6 +65,8 @@ def correctTOC(progress:Progress, mdLines:list[str], tocSection:str = 'Contents' if matches: _lines.append(f'{matches[0]} \n') continue + else: + _lines.append(line) else: _lines.append(line) @@ -112,11 +114,10 @@ def replaceFigureCaptions(progress:Progress, mdLines:list[str]) -> list[str]: if matches: # Replace the previous figure markdown name with the captions _idx = len(_lines) - 1 - while _idx >= 0 and not _lines[_idx].startswith('!['): + while _idx >= 0 and _lines[_idx].count('![') == 0: _idx -= 1 if _idx > 0: - _lines[_idx] = re.sub(r'^.*?]', f'![{matches[0]}]', _lines[_idx]) - + _lines[_idx] = re.sub(r'!.*]', f'![{matches[0]}]', _lines[_idx]) elif re.findall(nocaptionfigureregex, line): _lines.append(re.sub(r'^.*?]', f'![]', line)) else: -- GitLab