From 54a6c6bf7f0a0610a780b85e2dc7e94cab1fed50 Mon Sep 17 00:00:00 2001 From: r-sharp Date: Fri, 30 Jan 2026 11:29:48 +0000 Subject: [PATCH] Tweaks to get VSCode's internal linters to quit whinging and make the ToDo plugin work nicely. --- __init__.py | 0 .../checker_dispatch_tables.py | 135 +++++++++---- script_umdp3_checker/fortran_keywords.py | 9 +- script_umdp3_checker/search_lists.py | 4 +- script_umdp3_checker/umdp3_checker_rules.py | 190 ++++++++++++------ script_umdp3_checker/umdp3_conformance.py | 103 +++++++--- 6 files changed, 304 insertions(+), 137 deletions(-) create mode 100644 __init__.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/script_umdp3_checker/checker_dispatch_tables.py b/script_umdp3_checker/checker_dispatch_tables.py index fcca7bc6..fe01d5a8 100644 --- a/script_umdp3_checker/checker_dispatch_tables.py +++ b/script_umdp3_checker/checker_dispatch_tables.py @@ -13,8 +13,11 @@ from umdp3_checker_rules import UMDP3Checker """ -ToDo : This list was checked to ensure it had something for each +TODO : This list was checked to ensure it had something for each test in the original. +TODO : This needs to be re-checked. +TODO : And the tests need to be compared to the original Perl tests + to ensure they are equivalent. """ # Declare version @@ -30,31 +33,55 @@ def __init__(self): def get_diff_dispatch_table_fortran(self) -> Dict[str, Callable]: """Get dispatch table for Fortran diff tests""" return { - # 'Captain Daves doomed test of destruction': self.umdp3_checker.capitulated_keywords, - "Lowercase Fortran keywords not permitted": self.umdp3_checker.capitalised_keywords, - "OpenMP sentinels not in column one": self.umdp3_checker.openmp_sentinels_in_column_one, - "Omitted optional space in keywords": self.umdp3_checker.unseparated_keywords, + # 'Captain Daves doomed test of destruction': + # self.umdp3_checker.capitulated_keywords, + "Lowercase Fortran keywords not permitted": + self.umdp3_checker.capitalised_keywords, + "OpenMP sentinels not in column one": + self.umdp3_checker.openmp_sentinels_in_column_one, + "Omitted optional space in keywords": + self.umdp3_checker.unseparated_keywords, "GO TO other than 9999": self.umdp3_checker.go_to_other_than_9999, - "WRITE without format": self.umdp3_checker.write_using_default_format, - "Lowercase or CamelCase variable names only": self.umdp3_checker.lowercase_variable_names, - "Use of dimension attribute": self.umdp3_checker.dimension_forbidden, - "Continuation lines shouldn't start with &": self.umdp3_checker.ampersand_continuation, - "Use of EQUIVALENCE or PAUSE": self.umdp3_checker.forbidden_keywords, - "Use of older form of relational operator (.GT. etc.)": self.umdp3_checker.forbidden_operators, - "Line longer than 80 characters": self.umdp3_checker.line_over_80chars, + "WRITE without format": + self.umdp3_checker.write_using_default_format, + "Lowercase or CamelCase variable names only": + self.umdp3_checker.lowercase_variable_names, + "Use of dimension attribute": + self.umdp3_checker.dimension_forbidden, + "Continuation lines shouldn't start with &": + self.umdp3_checker.ampersand_continuation, + "Use of EQUIVALENCE or PAUSE": + self.umdp3_checker.forbidden_keywords, + "Use of older form of relational operator (.GT. etc.)": + self.umdp3_checker.forbidden_operators, + "Line longer than 80 characters": + self.umdp3_checker.line_over_80chars, "Line includes tab character": self.umdp3_checker.tab_detection, - "USEd printstatus_mod instead of umPrintMgr": self.umdp3_checker.printstatus_mod, - "Used PRINT rather than umMessage and umPrint": self.umdp3_checker.printstar, - "Used WRITE(6) rather than umMessage and umPrint": self.umdp3_checker.write6, - "Used um_fort_flush rather than umPrintFlush": self.umdp3_checker.um_fort_flush, - "Used Subversion keyword substitution which is prohibited": self.umdp3_checker.svn_keyword_subst, - "Used !OMP instead of !$OMP": self.umdp3_checker.omp_missing_dollar, - "Used #ifdef or #ifndef rather than #if defined() or #if !defined()": self.umdp3_checker.cpp_ifdef, - "Presence of fortran comment in CPP directive": self.umdp3_checker.cpp_comment, - "Used an archaic fortran intrinsic function": self.umdp3_checker.obsolescent_fortran_intrinsic, - "EXIT statements should be labelled": self.umdp3_checker.exit_stmt_label, - "Intrinsic modules must be USEd with an INTRINSIC keyword specifier": self.umdp3_checker.intrinsic_modules, - "READ statements should have an explicit UNIT= as their first argument": self.umdp3_checker.read_unit_args, + "USEd printstatus_mod instead of umPrintMgr": + self.umdp3_checker.printstatus_mod, + "Used PRINT rather than umMessage and umPrint": + self.umdp3_checker.printstar, + "Used WRITE(6) rather than umMessage and umPrint": + self.umdp3_checker.write6, + "Used um_fort_flush rather than umPrintFlush": + self.umdp3_checker.um_fort_flush, + "Used Subversion keyword substitution which is prohibited": + self.umdp3_checker.svn_keyword_subst, + "Used !OMP instead of !$OMP": + self.umdp3_checker.omp_missing_dollar, + "Used #ifdef/#ifndef rather than #if defined() or #if !defined()": + self.umdp3_checker.cpp_ifdef, + "Presence of fortran comment in CPP directive": + self.umdp3_checker.cpp_comment, + "Used an archaic fortran intrinsic function": + self.umdp3_checker.obsolescent_fortran_intrinsic, + "EXIT statements should be labelled": + self.umdp3_checker.exit_stmt_label, + "Intrinsic modules must be USEd with an INTRINSIC " + + "keyword specifier": self.umdp3_checker.intrinsic_modules, + "READ statements should have an explicit UNIT= as " + + "their first argument": + self.umdp3_checker.read_unit_args, } def get_file_dispatch_table_fortran( @@ -62,40 +89,64 @@ def get_file_dispatch_table_fortran( ) -> Dict[str, Callable]: """Get dispatch table for Fortran file tests""" return { - "Warning - used an if-def due for retirement": self.umdp3_checker.retire_if_def, - "File is missing at least one IMPLICIT NONE": self.umdp3_checker.implicit_none, + "Warning - used an if-def due for retirement": + self.umdp3_checker.retire_if_def, + "File is missing at least one IMPLICIT NONE": + self.umdp3_checker.implicit_none, "Never use STOP or CALL abort": self.umdp3_checker.forbidden_stop, - "Use of Fortran function as a variable name": self.umdp3_checker.intrinsic_as_variable, - "File missing crown copyright statement or agreement reference": self.umdp3_checker.check_crown_copyright, - "File missing correct code owner comment": self.umdp3_checker.check_code_owner, - "Used (/ 1,2,3 /) form of array initialisation, rather than [1,2,3] form": self.umdp3_checker.array_init_form, + "Use of Fortran function as a variable name": + self.umdp3_checker.intrinsic_as_variable, + "File missing crown copyright statement or agreement reference": + self.umdp3_checker.check_crown_copyright, + "File missing correct code owner comment": + self.umdp3_checker.check_code_owner, + "Used (/ 1,2,3 /) form of array initialisation, rather than " + + "[1,2,3] form": self.umdp3_checker.array_init_form, } def get_diff_dispatch_table_c(self) -> Dict[str, Callable]: """Get dispatch table for C diff tests""" return { - "Line longer than 80 characters": self.umdp3_checker.line_over_80chars, + "Line longer than 80 characters": + self.umdp3_checker.line_over_80chars, "Line includes tab character": self.umdp3_checker.tab_detection, - 'Fixed-width Integer format specifiers must have a space between themselves and the string delimiter (the " character)': self.umdp3_checker.c_integral_format_specifiers, + "Fixed-width Integer format specifiers must have a space " + + "between themselves and the string delimiter (the \" character)": + self.umdp3_checker.c_integral_format_specifiers, } def get_file_dispatch_table_c(self) -> Dict[str, Callable]: """Get dispatch table for C file tests""" return { - "Warning - used an if-def due for retirement": self.umdp3_checker.retire_if_def, + "Warning - used an if-def due for retirement": + self.umdp3_checker.retire_if_def, "Used a deprecated C identifier": self.umdp3_checker.c_deprecated, - "File missing crown copyright statement or agreement reference": self.umdp3_checker.check_crown_copyright, - "File missing correct code owner comment": self.umdp3_checker.check_code_owner, - "Used an _OPENMP if-def without also testing against SHUM_USE_C_OPENMP_VIA_THREAD_UTILS. (Or _OPENMP does not come first in the test.)": self.umdp3_checker.c_openmp_define_pair_thread_utils, - "Used an _OPENMP && SHUM_USE_C_OPENMP_VIA_THREAD_UTILS if-def test in a logical combination with a third macro": self.umdp3_checker.c_openmp_define_no_combine, - "Used !defined(_OPENMP) rather than defined(_OPENMP) with #else branch": self.umdp3_checker.c_openmp_define_not, - "Used an omp #pragma (or #include ) without protecting it with an _OPENMP if-def": self.umdp3_checker.c_protect_omp_pragma, - "Used the #ifdef style of if-def, rather than the #if defined() style": self.umdp3_checker.c_ifdef_defines, - "C Unit does not end with a final newline character": self.umdp3_checker.c_final_newline, + "File missing crown copyright statement or agreement reference": + self.umdp3_checker.check_crown_copyright, + "File missing correct code owner comment": + self.umdp3_checker.check_code_owner, + "Used an _OPENMP if-def without also testing against " + + "SHUM_USE_C_OPENMP_VIA_THREAD_UTILS. (Or _OPENMP does " + + "not come first in the test.)": + self.umdp3_checker.c_openmp_define_pair_thread_utils, + "Used an _OPENMP && SHUM_USE_C_OPENMP_VIA_THREAD_UTILS if-def " + + "test in a logical combination with a third macro": + self.umdp3_checker.c_openmp_define_no_combine, + "Used !defined(_OPENMP) rather than defined(_OPENMP) " + + "with #else branch": self.umdp3_checker.c_openmp_define_not, + "Used an omp #pragma (or #include ) without " + + "protecting it with an _OPENMP if-def": + self.umdp3_checker.c_protect_omp_pragma, + "Used the #ifdef style of if-def, rather than the #if " + + "defined() style": + self.umdp3_checker.c_ifdef_defines, + "C Unit does not end with a final newline character": + self.umdp3_checker.c_final_newline, } def get_file_dispatch_table_all(self) -> Dict[str, Callable]: """Get dispatch table for universal file tests""" return { - "Line includes trailing whitespace character(s)": self.umdp3_checker.line_trail_whitespace, + "Line includes trailing whitespace character(s)": + self.umdp3_checker.line_trail_whitespace, } diff --git a/script_umdp3_checker/fortran_keywords.py b/script_umdp3_checker/fortran_keywords.py index 06a92945..d9b6be82 100644 --- a/script_umdp3_checker/fortran_keywords.py +++ b/script_umdp3_checker/fortran_keywords.py @@ -7,8 +7,13 @@ """ List of the Fortran Keywords for the UMDP3 checker to check against. Hopefully complete, but may need to be updated from time to time. -These have been 'ordered' in terms of frequency of occurrence within the UM codebase in order to improve efficiency. -ToDo: Current order may not be perfect, and could possibly be reviewed. However, it is probably 'good enough' for now. +These have been 'ordered' in terms of frequency of occurrence within +the UM codebase in order to improve efficiency. +""" + +""" +TODO: Current order may not be perfect, and could possibly be reviewed. +However, it is probably 'good enough' for now. """ fortran_keywords = ( diff --git a/script_umdp3_checker/search_lists.py b/script_umdp3_checker/search_lists.py index bda44e0c..7ee2c74b 100644 --- a/script_umdp3_checker/search_lists.py +++ b/script_umdp3_checker/search_lists.py @@ -5,7 +5,9 @@ # ----------------------------------------------------------------------------- """ -Lists of words for Fortran checks. Some to confirm they are found in the approved form, some to test for as the intention is that they should no longer appear in the code. +Lists of words for Fortran checks. Some to confirm they are found in the +approved form, some to test for as the intention is that they should no longer +appear in the code. """ # Obsolescent Fortran intrinsics : These should not be used in new code and diff --git a/script_umdp3_checker/umdp3_checker_rules.py b/script_umdp3_checker/umdp3_checker_rules.py index 5357f785..6a068f27 100644 --- a/script_umdp3_checker/umdp3_checker_rules.py +++ b/script_umdp3_checker/umdp3_checker_rules.py @@ -22,10 +22,11 @@ from dataclasses import dataclass, field """ -ToDo : Several of the test functions are poor shadows of the original +TODO : Several of the test functions are poor shadows of the original Perl versions. They would benefit from improving to catch more cases. - Equally, there could probably be more consistancly in how things like comments are stripped from the ends of lines + Equally, there could probably be more consistancly in how things + like comments are stripped from the ends of lines and/or full comment lines are skipped. """ @@ -37,7 +38,8 @@ class TestResult: """Result from running a single style checker test on a file.""" - """ToDo : unsure if both output and errors are required. + """ + TODO : unsure if both output and errors are required. They make a bit more sense in the 'external_checkers' where they hold stdout and stderr.""" checker_name: str = "Unnamed Checker" @@ -50,7 +52,8 @@ class TestResult: class UMDP3Checker: """UMDP3 compliance checker class""" - """ToDO : This class could possibly be abandoned, or replaced + """ + TODO : This class could possibly be abandoned, or replaced by a similar class at a different level. Presently only one instance is created in such a way that the original _extra_error_info can't be used to hold extra information @@ -63,29 +66,42 @@ class UMDP3Checker: def __init__(self): self._extra_error_info = {} self._lock = threading.Lock() - """ToDo: The Perl version had a dodgy looking subroutine to calculate + """ + TODO: The Perl version had a dodgy looking subroutine to calculate this, but I can't find where it was called from within the files in 'bin'. It used all args as a 'list' - searched them for '#include' and - then returned the count as well as adding 1 to this global var if any were found. - This is either redundant and needs removing, or needs implementing properly.""" + then returned the count as well as adding 1 to this global var if any + were found. + This is either redundant and needs removing, or needs implementing + properly.""" self._number_of_files_with_variable_declarations_in_includes = 0 def reset_extra_error_information(self): """Reset extra error information : - Appears to be used 'between' blocks of tests such as those on diffs and those on full files. + Appears to be used 'between' blocks of tests such as those on diffs and + those on full files. """ with self._lock: self._extra_error_info = {} def get_extra_error_information(self) -> Dict: - """Get extra error information. Dictionary with file names as the keys.""" - """ToDo: I presume this is what's used when creating the report of the actual failures and not just the count. However, this information doesn't seem to be output as yet and will need implementing.""" + """ + Get extra error information. Dictionary with file names as the keys. + """ + """ + + TODO: I presume this is what's used when creating the report of the + actual failures and not just the count. However, this information + doesn't seem to be output as yet and will need implementing. + """ with self._lock: return self._extra_error_info.copy() def add_extra_error(self, key: str, value: str = ""): """Add extra error information to the dictionary""" - """ToDo: The usefulness of the information added has not been assesed, nor does it appear to be reported as yet.""" + """ + TODO: The usefulness of the information added has not been assesed, + nor does it appear to be reported as yet.""" with self._lock: self._extra_error_info[key] = value @@ -93,7 +109,8 @@ def add_error_log( self, error_log: Dict, key: str = "no key", value: int = 0 ) -> Dict: """Add extra error information to the dictionary""" - """ToDo: This is a bodge to get more detailed info about + """ + TODO: This is a bodge to get more detailed info about the errors back to the calling program. The info is useful, but is currently presented on a per-test basis rather than a per-file which would be easier to read @@ -105,12 +122,18 @@ def add_error_log( def get_include_number(self) -> int: """Get number of files with variable declarations in includes""" - """ToDo: At present, this is hardwired to zero and I don't think anything alters it along the way. Plus it doesn't seem to be called from anywhere..... So this getter is probably very redundant.""" + """ + TODO: At present, this is hardwired to zero and I don't think + anything alters it along the way. Plus it doesn't seem to be called + from anywhere..... So this getter is probably very redundant.""" return self._number_of_files_with_variable_declarations_in_includes def remove_quoted(self, line: str) -> str: """Remove quoted strings from a line""" - """ToDo: The original version replaced the quoted sections with a "blessed reference", presumably becuase they were 're-inserted' at some stage. No idea if that capability is still required.""" + """ + TODO: The original version replaced the quoted sections with a + "blessed reference", presumably becuase they were 're-inserted' at some + stage. No idea if that capability is still required.""" # Simple implementation - remove single and double quoted strings result = line @@ -125,8 +148,15 @@ def remove_quoted(self, line: str) -> str: """Test functions : Each accepts a list of 'lines' to search and returns a TestResult object containing all the information.""" - """ToDo: One thought here is each test should also be told whether it's being passed the contents of a full file, or just a selection of lines involved in a change as some of the tests appear to really only be useful if run on a full file (e.g. the Implicit none checker). Thus if only passed a selection of lines, these tests could be skipped/return 'pass' regardless. - Although, a brief look seems to imply that there are two 'dispatch tables' one for full files and one for changed lines.""" + """ + TODO: One thought here is each test should also be told whether it's + being passed the contents of a full file, or just a selection of lines + involved in a change as some of the tests appear to really only be useful + if run on a full file (e.g. the Implicit none checker). Thus if only passed + a selection of lines, these tests could be skipped/return 'pass' + regardless. + Although, a brief look seems to imply that there are two 'dispatch tables' + one for full files and one for changed lines.""" def capitulated_keywords(self, lines: List[str]) -> TestResult: """A fake test, put in for testing purposes. @@ -141,7 +171,7 @@ def capitulated_keywords(self, lines: List[str]) -> TestResult: if line.lstrip(" ").startswith("!"): continue clean_line = self.remove_quoted(line) - clean_line = self.comment_line.sub("", clean_line) # Remove comments + clean_line = self.comment_line.sub("", clean_line) # Check for lowercase keywords for word in self.word_splitter.findall(clean_line): @@ -173,8 +203,7 @@ def capitalised_keywords(self, lines: List[str]) -> TestResult: if line.lstrip(" ").startswith("!"): continue clean_line = self.remove_quoted(line) - clean_line = self.comment_line.sub("", clean_line) # Remove comments - + clean_line = self.comment_line.sub("", clean_line) # Check for lowercase keywords for word in self.word_splitter.findall(clean_line): upcase = word.upper() @@ -225,7 +254,8 @@ def unseparated_keywords(self, lines: List[str]) -> TestResult: clean_line = self.remove_quoted(line) for pattern in [f"\\b{kw}\\b" for kw in unseparated_keywords_list]: if re.search(pattern, clean_line, re.IGNORECASE): - self.add_extra_error(f"unseparated keyword in line: {line.strip()}") + self.add_extra_error(f"unseparated keyword in line: " + f"{line.strip()}") failures += 1 error_log = self.add_error_log( error_log, @@ -250,7 +280,8 @@ def go_to_other_than_9999(self, lines: List[str]) -> TestResult: clean_line = self.remove_quoted(line) clean_line = re.sub(r"!.*$", "", clean_line) - if match := re.search(r"\bGO\s*TO\s+(\d+)", clean_line, re.IGNORECASE): + if match := re.search(r"\bGO\s*TO\s+(\d+)", clean_line, + re.IGNORECASE): label = match.group(1) if label != "9999": self.add_extra_error(f"GO TO {label}") @@ -276,10 +307,12 @@ def write_using_default_format(self, lines: List[str]) -> TestResult: clean_line = self.remove_quoted(line) clean_line = re.sub(r"!.*$", "", clean_line) - if re.search(r"\bWRITE\s*\(\s*\*\s*,\s*\*\s*\)", clean_line, re.IGNORECASE): + if re.search(r"\bWRITE\s*\(\s*\*\s*,\s*\*\s*\)", clean_line, + re.IGNORECASE): self.add_extra_error("WRITE(*,*) found") failures += 1 - error_log = self.add_error_log(error_log, "WRITE(*,*) found", count + 1) + error_log = self.add_error_log(error_log, "WRITE(*,*) found", + count + 1) output = f"Checked {count+1} lines, found {failures} failures." return TestResult( checker_name="WRITE using default format", @@ -291,8 +324,12 @@ def write_using_default_format(self, lines: List[str]) -> TestResult: def lowercase_variable_names(self, lines: List[str]) -> TestResult: """Check for lowercase or CamelCase variable names only""" - """ToDo: This is a very simplistic check and will not detect many - cases which break UMDP3. I suspect the Perl Predeccessor concattenated continuation lines prior to 'cleaning' and checking. Having identified a declaration, it also then scanned the rest of the file for that variable name in any case.""" + """ + TODO: This is a very simplistic check and will not detect many + cases which break UMDP3. I suspect the Perl Predeccessor concattenated + continuation lines prior to 'cleaning' and checking. Having identified + a declaration, it also then scanned the rest of the file for that + variable name in any case.""" failures = 0 error_log = {} count = -1 @@ -306,14 +343,16 @@ def lowercase_variable_names(self, lines: List[str]) -> TestResult: clean_line, re.IGNORECASE, ): - # print(f"Debug: Found variable declaration line: {clean_line}") + # print("Debug: Found variable declaration line: " + # f"{clean_line}") clean_line = re.sub( r"^\s*(INTEGER|REAL|LOGICAL|CHARACTER|TYPE)\s*.*::\s*", "", clean_line, ) if re.search(r"[A-Z]{2,}", clean_line): - # print(f"Debug: Found UPPERCASE variable name: {clean_line}") + # print(f"Debug: Found UPPERCASE variable name: " + # f"{clean_line}") self.add_extra_error("UPPERCASE variable name") failures += 1 error_log = self.add_error_log( @@ -377,7 +416,8 @@ def ampersand_continuation(self, lines: List[str]) -> TestResult: def forbidden_keywords(self, lines: List[str]) -> TestResult: """Check for use of EQUIVALENCE or PAUSE""" - """ToDo: Can't believe this will allow a COMMON BLOCK.... + """ + TODO: Can't believe this will allow a COMMON BLOCK.... Need to check against what the original did..""" failures = 0 error_log = {} @@ -386,7 +426,8 @@ def forbidden_keywords(self, lines: List[str]) -> TestResult: clean_line = self.remove_quoted(line) clean_line = re.sub(r"!.*$", "", clean_line) - if re.search(r"\b(EQUIVALENCE|PAUSE)\b", clean_line, re.IGNORECASE): + if re.search(r"\b(EQUIVALENCE|PAUSE)\b", clean_line, + re.IGNORECASE): self.add_extra_error("forbidden keyword") failures += 1 error_log = self.add_error_log( @@ -421,7 +462,8 @@ def forbidden_operators(self, lines: List[str]) -> TestResult: ) return TestResult( - checker_name="Use of older form of relational operator (.GT. etc.)", + checker_name="Use of older form of relational operator " + + "(.GT. etc.)", failure_count=failures, passed=(failures == 0), output=f"Checked {count+1} lines, found {failures} failures.", @@ -437,7 +479,8 @@ def line_over_80chars(self, lines: List[str]) -> TestResult: if len(line.rstrip()) > 80: self.add_extra_error("line too long") failures += 1 - error_log = self.add_error_log(error_log, "line too long", count + 1) + error_log = self.add_error_log(error_log, "line too long", + count + 1) return TestResult( checker_name="Line longer than 80 characters", @@ -501,7 +544,8 @@ def printstar(self, lines: List[str]) -> TestResult: if re.search(r"\bPRINT\s*\*", clean_line, re.IGNORECASE): self.add_extra_error("PRINT * used") failures += 1 - error_log = self.add_error_log(error_log, "PRINT * used", count + 1) + error_log = self.add_error_log(error_log, "PRINT * used", + count + 1) return TestResult( checker_name="Use of PRINT rather than umMessage and umPrint", @@ -523,7 +567,8 @@ def write6(self, lines: List[str]) -> TestResult: if re.search(r"\bWRITE\s*\(\s*6\s*,", clean_line, re.IGNORECASE): self.add_extra_error("WRITE(6) used") failures += 1 - error_log = self.add_error_log(error_log, "WRITE(6) used", count + 1) + error_log = self.add_error_log(error_log, "WRITE(6) used", + count + 1) return TestResult( checker_name="Use of WRITE(6) rather than umMessage and umPrint", @@ -579,10 +624,12 @@ def omp_missing_dollar(self, lines: List[str]) -> TestResult: error_log = {} count = -1 for count, line in enumerate(lines): - if re.search(r"!\s*OMP\b", line) and not re.search(r"!\$OMP", line): + if (re.search(r"!\s*OMP\b", line) and + not re.search(r"!\$OMP", line)): self.add_extra_error("!OMP without $") failures += 1 - error_log = self.add_error_log(error_log, "!OMP without $", count + 1) + error_log = self.add_error_log(error_log, "!OMP without $", + count + 1) return TestResult( checker_name="!OMP without $", @@ -615,7 +662,8 @@ def cpp_ifdef(self, lines: List[str]) -> TestResult: def cpp_comment(self, lines: List[str]) -> TestResult: """Check for Fortran comments in CPP directives""" - """Todo: This looks like it will incorrectly fail # if !defined(X) + """ + TODO: This looks like it will incorrectly fail # if !defined(X) How did the original do this test?""" failures = 0 error_log = {} @@ -629,7 +677,8 @@ def cpp_comment(self, lines: List[str]) -> TestResult: self.add_extra_error("Fortran comment in CPP directive") failures += 1 error_log = self.add_error_log( - error_log, "Fortran comment in CPP directive", count + 1 + error_log, "Fortran comment in CPP directive", + count + 1 ) return TestResult( @@ -654,7 +703,8 @@ def obsolescent_fortran_intrinsic(self, lines: List[str]) -> TestResult: self.add_extra_error(f"obsolescent intrinsic: {intrinsic}") failures += 1 error_log = self.add_error_log( - error_log, f"obsolescent intrinsic: {intrinsic}", count + 1 + error_log, f"obsolescent intrinsic: {intrinsic}", + count + 1 ) return TestResult( @@ -702,8 +752,10 @@ def intrinsic_modules(self, lines: List[str]) -> TestResult: for module in intrinsic_modules: if re.search( rf"\bUSE\s+(::)*\s*{module}\b", clean_line, re.IGNORECASE - ) and not re.search(r"\bINTRINSIC\b", clean_line, re.IGNORECASE): - self.add_extra_error(f"intrinsic module {module} without INTRINSIC") + ) and not re.search(r"\bINTRINSIC\b", clean_line, + re.IGNORECASE): + self.add_extra_error( + f"intrinsic module {module} without INTRINSIC") failures += 1 error_log = self.add_error_log( error_log, @@ -728,7 +780,8 @@ def read_unit_args(self, lines: List[str]) -> TestResult: clean_line = self.remove_quoted(line) clean_line = re.sub(r"!.*$", "", clean_line) - if match := re.search(r"\bREAD\s*\(\s*([^,)]+)", clean_line, re.IGNORECASE): + if match := re.search(r"\bREAD\s*\(\s*([^,)]+)", clean_line, + re.IGNORECASE): first_arg = match.group(1).strip() if not first_arg.upper().startswith("UNIT="): self.add_extra_error("READ without explicit UNIT=") @@ -747,7 +800,8 @@ def read_unit_args(self, lines: List[str]) -> TestResult: def retire_if_def(self, lines: List[str]) -> TestResult: """Check for if-defs due for retirement""" - # retired_ifdefs = ['VATPOLES', 'A12_4A', 'A12_3A', 'UM_JULES', 'A12_2A',] + # retired_ifdefs = ['VATPOLES', 'A12_4A', 'A12_3A', 'UM_JULES', + # 'A12_2A',] failures = 0 error_log = {} count = -1 @@ -761,13 +815,16 @@ def retire_if_def(self, lines: List[str]) -> TestResult: line, re.IGNORECASE, ): - # # The above match either returns [None, SYMBOL] or [SYMBOL, None] - # SYMBOL = [x for x in match.groups() if x] # reduce to a list of 1 element + # # The above match either returns [None, SYMBOL] or + # [SYMBOL, None] + # SYMBOL = [x for x in match.groups() if x] # reduce to a + # list of 1 element if match.group(1) in retired_ifdefs: self.add_extra_error(f"retired if-def: {match.group(1)}") failures += 1 error_log = self.add_error_log( - error_log, f"retired if-def: {match.group(1)}", count + 1 + error_log, f"retired if-def: {match.group(1)}", + count + 1 ) return TestResult( checker_name="retired if-def", @@ -809,7 +866,8 @@ def forbidden_stop(self, lines: List[str]) -> TestResult: clean_line = self.remove_quoted(line) clean_line = re.sub(r"!.*$", "", clean_line) - if re.search(r"\b(STOP|CALL\s+abort)\b", clean_line, re.IGNORECASE): + if re.search(r"\b(STOP|CALL\s+abort)\b", clean_line, + re.IGNORECASE): self.add_extra_error("STOP or CALL abort used") failures += 1 error_log = self.add_error_log( @@ -835,11 +893,9 @@ def intrinsic_as_variable(self, lines: List[str]) -> TestResult: # as I doubt this does anything near what that did... for count, line in enumerate(lines): clean_line = self.remove_quoted(line) - if re.search( - r"^\s*(INTEGER|REAL|LOGICAL|CHARACTER)\s*.*::\s*(SIN|COS|LOG|EXP|TAN)\b", - clean_line, - re.IGNORECASE, - ): + check = (r"^\s*(INTEGER|REAL|LOGICAL|CHARACTER)\s*.*:" + + r":\s*(SIN|COS|LOG|EXP|TAN)\b") + if re.search(check, clean_line, re.IGNORECASE, ): self.add_extra_error("intrinsic function used as variable") failures += 1 error_log = self.add_error_log( @@ -856,7 +912,8 @@ def intrinsic_as_variable(self, lines: List[str]) -> TestResult: def check_crown_copyright(self, lines: List[str]) -> TestResult: """Check for crown copyright statement""" - """ToDo: This is a very simplistic check and will not detect many + """ + TODO: This is a very simplistic check and will not detect many cases which break UMDP3. I suspect the Perl Predeccessor did much more convoluted tests""" comment_lines = [ @@ -869,7 +926,8 @@ def check_crown_copyright(self, lines: List[str]) -> TestResult: found_copyright = True if not found_copyright: - self.add_extra_error("missing copyright or crown copyright statement") + self.add_extra_error( + "missing copyright or crown copyright statement") error_log = self.add_error_log( error_log, "missing copyright or crown copyright statement", 0 ) @@ -883,20 +941,27 @@ def check_crown_copyright(self, lines: List[str]) -> TestResult: def check_code_owner(self, lines: List[str]) -> TestResult: """Check for correct code owner comment""" - """ToDo: oh wow is this test worthless. We don't even guarentee to put the wrds "code owner" in a file. Plus, that's before you take into account both returns were '0' - so it couldn't possibly fail (picard.gif) - The Perl looks to have been designed to check the whole file, and turns various logicals on/off dependent on previously processed lines.""" + """ + TODO: oh wow is this test worthless. We don't even guarentee to put + the wrds "code owner" in a file. Plus, that's before you take into + account both returns were '0' - so it couldn't possibly fail + (picard.gif) + The Perl looks to have been designed to check the whole file, and turns + various logicals on/off dependent on previously processed lines.""" # Simplified check for code owner information file_content = "\n".join(lines) found_code_owner = False error_log = {} - if "Code Owner:" in file_content or "code owner" in file_content.lower(): + if ("Code Owner:" in file_content or + "code owner" in file_content.lower()): # print(f"Debug: Found {file_content.lower()}") found_code_owner = True # This is often a warning rather than an error if not found_code_owner: self.add_extra_error("missing code owner comment") - error_log = self.add_error_log(error_log, "missing code owner comment", 0) + error_log = self.add_error_log(error_log, + "missing code owner comment", 0) return TestResult( checker_name="Code Owner Comment", failure_count=0 if found_code_owner else 1, @@ -907,7 +972,10 @@ def check_code_owner(self, lines: List[str]) -> TestResult: def array_init_form(self, lines: List[str]) -> TestResult: """Check for old array initialization form""" - """ToDo: Another instance that assumes continuation lines are concatenated prior to executing the actual test to ensure both forward slashes are on the same line.""" + """ + TODO: Another instance that assumes continuation lines are + concatenated prior to executing the actual test to ensure both forward + slashes are on the same line.""" failures = 0 error_log = {} count = -1 @@ -965,7 +1033,8 @@ def c_deprecated(self, lines: List[str]) -> int: for line in lines: for identifier in deprecated_c_identifiers: if re.search(rf"\b{identifier}\b", line): - self.add_extra_error(f"deprecated C identifier: {identifier}") + self.add_extra_error( + f"deprecated C identifier: {identifier}") failures += 1 return failures @@ -992,7 +1061,8 @@ def c_openmp_define_no_combine(self, lines: List[str]) -> int: ) or re.search( r"&&.*_OPENMP.*&&.*SHUM_USE_C_OPENMP_VIA_THREAD_UTILS", line ): - self.add_extra_error("OpenMP defines combined with third macro") + self.add_extra_error( + "OpenMP defines combined with third macro") failures += 1 return failures diff --git a/script_umdp3_checker/umdp3_conformance.py b/script_umdp3_checker/umdp3_conformance.py index 56a98980..7d8b6f25 100644 --- a/script_umdp3_checker/umdp3_conformance.py +++ b/script_umdp3_checker/umdp3_conformance.py @@ -4,17 +4,14 @@ from typing import Callable, List, Dict, Set from dataclasses import dataclass, field import argparse +from checker_dispatch_tables import CheckerDispatchTables +from umdp3_checker_rules import TestResult +import concurrent.futures # Add custom modules to Python path if needed # Add the repository root to access fcm_bdiff and git_bdiff packages import sys - sys.path.insert(0, str(Path(__file__).parent.parent)) -from github_scripts import git_bdiff -import fcm_bdiff -from checker_dispatch_tables import CheckerDispatchTables -from umdp3_checker_rules import TestResult -import concurrent.futures """ Framework and Classes to generate a list of files to check for style @@ -55,6 +52,8 @@ class GitBdiffWrapper(CMSSystem): """Wrapper around git_bdiff to get changed files.""" def __init__(self, repo_path: Path = Path(".")): + from github_scripts import git_bdiff + self.repo_path = repo_path self.bdiff_obj = git_bdiff.GitBDiff(repo=self.repo_path) self.info_obj = git_bdiff.GitInfo(repo=self.repo_path) @@ -77,6 +76,8 @@ class FCMBdiffWrapper(CMSSystem): """Wrapper around fcm_bdiff to get changed files.""" def __init__(self, repo_path: Path = Path(".")): + from fcm_bdiff import fcm_bdiff + self.repo_path = repo_path self.bdiff_obj = fcm_bdiff.FCMBDiff(repo=self.repo_path) @@ -96,7 +97,8 @@ def get_branch_name(self) -> str: class StyleChecker(ABC): """Abstract base class for style checkers.""" - """ ToDo: This is where it might be good to set up a threadsafe + """ + TODO: This is where it might be good to set up a threadsafe class instance to hold the 'expanded' check outputs. One for each file being checked in parallel. Curently the UMDP3 class holds "_extra_error_info" which @@ -112,6 +114,22 @@ class instance to hold the 'expanded' check outputs. check_functions: Dict[str, Callable] files_to_check: List[Path] + def __init__( + self, + name: str, + file_extensions: Set[str], + check_functions: Dict[str, Callable], + changed_files: List[Path] = [], + ): + self.name = name + self.file_extensions = file_extensions or set() + self.check_functions = check_functions or {} + self.files_to_check = ( + self.filter_files(changed_files, self.file_extensions) + if changed_files + else [] + ) + @abstractmethod def get_name(self) -> str: """Return the name of this checker.""" @@ -181,7 +199,8 @@ def check(self, file_path: Path) -> CheckResult: for check_name, check_function in self.check_functions.items(): file_results.append(check_function(lines)) - tests_failed = sum([0 if result.passed else 1 for result in file_results]) + tests_failed = sum([0 if result.passed else 1 for result in + file_results]) return CheckResult( file_path=str(file_path), tests_failed=tests_failed, @@ -193,7 +212,11 @@ def check(self, file_path: Path) -> CheckResult: class ExternalChecker(StyleChecker): """Wrapper for external style checking tools.""" - """ToDo : This is overriding the 'syle type hint from the base class. As we're currently passing in a list of strings to pass to 'subcommand'. Ideally we should be making callable functions for each check, but that would require more refactoring of the code. + """ + TODO : This is overriding the 'syle type hint from the base class. + As we're currently passing in a list of strings to pass to 'subcommand'. + Ideally we should be making callable functions for each check, but that + would require more refactoring of the code. Is that a 'factory' method?""" check_commands: Dict[str, List[str]] @@ -229,7 +252,8 @@ def check(self, file_path: Path) -> CheckResult: for test_name, command in self.check_commands.items(): try: cmd = command + [str(file_path)] - result = subprocess.run(cmd, capture_output=True, text=True, timeout=60) + result = subprocess.run(cmd, capture_output=True, + text=True, timeout=60) except subprocess.TimeoutExpired: file_results.append( TestResult( @@ -300,7 +324,9 @@ def check_files(self) -> None: However, given that the number of files is likely to be small, and the number of checkers is also small, this should be acceptable for now. - ToDo : Might be good to have a threadsafe object for each file and + """ + """ + TODO : Might be good to have a threadsafe object for each file and allow multiple checks to be run at once on that file.""" results = [] @@ -321,15 +347,16 @@ def check_files(self) -> None: def print_results(self, print_volume: int = 3) -> bool: """Print results and return True if all checks passed. - ======================================================== - ToDo: If an object encapsulating the data for each file is created" + ========================================================""" + """ + TODO: If an object encapsulating the data for each file is created" it should contain the "in depth" printing method for file data. With this method presenting the summary and then looping over each file object to print its details at the desired verbosity.""" all_passed = True for result in self.results: file_status = "✓ PASS" if result.all_passed else "✗ FAIL" - # Lousy variable names here - 'result' is the CheckResult for a file + # Lousy variable names here: 'result' is the CheckResult for a file # which had multiple tests, so result.all_passed is for that file. all_passed = all_passed and result.all_passed if print_volume >= 2: @@ -337,7 +364,7 @@ def print_results(self, print_volume: int = 3) -> bool: if print_volume < 4 and result.all_passed: continue for test_result in result.test_results: - """ToDo : The output logic here is a bit of a mess.""" + # TODO : The output logic here is a bit of a mess. if print_volume < 5 and test_result.passed: continue if print_volume >= 4: @@ -381,19 +408,23 @@ def process_arguments(): default=["Fortran"], help="File types to check, comma-separated", ) - """ ToDo : I /think/ the old version also checked '.h' files as Fortran. + """ + TODO : I /think/ the old version also checked '.h' files as Fortran. Not sure if that is still needed.""" parser.add_argument( "-p", "--path", type=str, default="./", help="path to repository" ) parser.add_argument( - "--max-workers", type=int, default=8, help="Maximum number of parallel workers" + "--max-workers", type=int, default=8, + help="Maximum number of parallel workers" ) parser.add_argument( - "-v", "--verbose", action="count", default=0, help="Increase output verbosity" + "-v", "--verbose", action="count", default=0, + help="Increase output verbosity" ) parser.add_argument( - "-q", "--quiet", action="count", default=0, help="Decrease output verbosity" + "-q", "--quiet", action="count", default=0, + help="Decrease output verbosity" ) # The following are not yet implemented, but may become useful # branch and base branch could be used to configure the CMS diff @@ -416,7 +447,8 @@ def which_cms_is_it(path: str) -> CMSSystem: if (repo_path / ".git").is_dir(): return GitBdiffWrapper(repo_path) elif (repo_path / ".svn").is_dir(): - """ToDo : If we still want this to work reliably with FCM, it will need + """ + TODO : If we still want this to work reliably with FCM, it will need to also accept URLs and not just local paths.""" return FCMBdiffWrapper(repo_path) else: @@ -430,30 +462,34 @@ def create_style_checkers( dispatch_tables = CheckerDispatchTables() checkers = [] if "Fortran" in file_types: - file_extensions = {".f", ".for", ".f90", ".f95", ".f03", ".f08", ".F90"} + file_extensions = {".f", ".for", ".f90", ".f95", + ".f03", ".f08", ".F90"} fortran_diff_table = dispatch_tables.get_diff_dispatch_table_fortran() fortran_file_table = dispatch_tables.get_file_dispatch_table_fortran() print("Configuring Fortran checkers:") combined_checkers = fortran_diff_table | fortran_file_table fortran_file_checker = UMDP3_checker.from_full_list( - "Fortran Checker", file_extensions, combined_checkers, changed_files + "Fortran Checker", file_extensions, + combined_checkers, changed_files ) checkers.append(fortran_file_checker) if "Python" in file_types: print("Setting up Python external checkers.") file_extensions = {".py"} python_checkers = { - "flake 8": ["flake8", "-q"], - "black": ["black", "--check"], - "pylint": ["pylint", "-E"], - # "ruff" : ["ruff", "check"], + # "flake 8": ["flake8", "-q"], + # "black": ["black", "--check"], + # "pylint": ["pylint", "-E"], + "ruff": ["ruff", "check"], } python_file_checker = ExternalChecker( - "Python External Checkers", file_extensions, python_checkers, changed_files + "Python External Checkers", file_extensions, + python_checkers, changed_files ) checkers.append(python_file_checker) - """ ToDo : Puting this here, with no file type filtering, + """ + TODO : Puting this here, with no file type filtering, means it will always run on all changed files. It might be better to add the dispatch table to all the other checkers so it's only running on 'code' files.""" @@ -489,15 +525,17 @@ def create_style_checkers( print(f" {changed_file}") # Configure checkers - """ ToDo : Uncertain as to how flexible this needs to be. + """ + TODO : Uncertain as to how flexible this needs to be. For now, just configure checkers based on file type requested. Later, could add configuration files to specify which checkers to use for each file type.""" checkers = [] - active_checkers = create_style_checkers(args.file_types, cms.get_changed_files()) + active_checkers = create_style_checkers(args.file_types, + cms.get_changed_files()) - # ToDo : Could create a conformance checker for each + # TODO : Could create a conformance checker for each # file type. # Currently, just create a single conformance checker # with all active checkers. @@ -512,6 +550,7 @@ def create_style_checkers( all_passed = checker.print_results(print_volume=args.volume) print(f"Total files checked: {len(checker.results)}") - print(f"Total files failed: {sum(1 for r in checker.results if not r.all_passed)}") + print(f"Total files failed: " + f"{sum(1 for r in checker.results if not r.all_passed)}") exit(0 if all_passed else 1)