@@ -908,44 +908,67 @@ def print_checkers(glob_pattern:str):
908908};
909909""" )
910910
911+ # SEI CERT is available as markdown files at
912+ # https://github.com/cmu-sei/secure-coding-standards
913+ CERT_REPO = 'cmu-sei/secure-coding-standards'
914+ CERT_BRANCH = 'main'
911915
912- def getCertCInfo (main_url :str ):
916+ # Cache git tree
917+ _cert_tree = None
918+
919+ def listCertFiles (content_subdir :str ):
920+ """Lists the rules and recommendation files for one part of the standard."""
921+ global _cert_tree
922+ if _cert_tree is None :
923+ url = 'https://api.github.com/repos/%s/git/trees/%s?recursive=1' % (CERT_REPO , CERT_BRANCH )
924+ _cert_tree = requests .get (url , timeout = 50 ).json ()['tree' ]
925+ prefix = 'content/' + content_subdir
926+ files = []
927+ for entry in _cert_tree :
928+ path = entry ['path' ]
929+ if entry ['type' ] != 'blob' or not path .startswith (prefix ) or not path .endswith ('.md' ):
930+ continue
931+ if 'index' in path .rsplit ('/' , 1 )[- 1 ].lower ():
932+ continue
933+ files .append (path )
934+ return sorted (files )
935+
936+ def printCertCInfo (content_subdir :str ):
913937 """Fetches CERT C rules information."""
914938 # Fetching the CERT C rules page
915- r = requests .get (main_url , timeout = 30 )
916- mainpage = r .text
917- for line in mainpage .split ('\n ' ):
918- res = re .search (r'<a href="(/confluence/[^"]+)">(Rule|Rec.) \d\d[.] [A-Za-z ]+ [(][A-Z][A-Z][A-Z][)]' , line )
939+ rules = {}
940+ for path in listCertFiles (content_subdir ):
941+ raw = 'https://raw.githubusercontent.com/%s/%s/%s' % (CERT_REPO , CERT_BRANCH , path )
942+ text = requests .get (raw , timeout = 30 ).text
943+ res = re .search (r'^#\s+([A-Z]{3}\d{2}-C(?:PP)?)\b' , text , re .MULTILINE )
919944 if res is None :
920945 continue
921- r = requests .get ('https://wiki.sei.cmu.edu' + res .group (1 ), timeout = 30 )
922- text = r .text .replace ('\n ' , '' ).replace ('<tr>' , '\n ' ).replace ('</tr>' , '\n ' )
923- rules = []
924- for line in text .split ('\n ' ):
925- if not line .startswith ('<td' ):
926- continue
927- #print(line)
928- res = re .match (r'[^>]+>([A-Z][A-Z][A-Z][0-9][0-9]-CP*)<.*>(L[1-3])<.+' , line )
929- if res :
930- if res .group (1 ) == 'EXP40-C' and 'EXP39-C' not in rules :
931- print (' {"EXP39-C", "L2"},' )
932- print (' {"%s", "%s"},' % (res .group (1 ), res .group (2 )))
933- rules .append (res .group (1 ))
934- if 'EXP45-C' in rules :
935- if 'EXP46-C' not in rules :
936- print (' {"EXP46-C", "L2"},' )
937- if 'EXP47-C' not in rules :
938- print (' {"EXP47-C", "L2"},' )
946+ # Find risk assessment section.
947+ head = re .search (r'^#{0,4}\s*Risk Assessments?\s*$' , text , re .MULTILINE )
948+ if head is None :
949+ continue
950+ # Look in section
951+ section = text [head .end ():]
952+ nexthead = re .search (r'^#{1,4}\s+\S' , section , re .MULTILINE )
953+ if nexthead :
954+ section = section [:nexthead .start ()]
955+ level = re .search (r'\bL[1-3]\b' , section , re .MULTILINE )
956+ if level is None :
957+ continue
958+ rules [res .group (1 )] = level .group (0 )
959+ for rule_id , level in dict (sorted (rules .items ())).items ():
960+ print (' {"%s", "%s"},' % (rule_id , level ))
961+
939962
940963
941964print ('std::vector<checkers::Info> checkers::certCInfo{' )
942- getCertCInfo ( 'https://wiki .sei.cmu.edu/confluence/display/c/2+Rules ' )
965+ printCertCInfo ( '4 .sei-cert-c-coding-standard/03.rules/ ' )
943966print (' // Recommendations' )
944- getCertCInfo ( 'https://wiki .sei.cmu.edu/confluence/display/c/3+Recommendations ' )
967+ printCertCInfo ( '4 .sei-cert-c-coding-standard/08.recommendations/ ' )
945968print ('};' )
946969print ('' )
947970print ('std::vector<checkers::Info> checkers::certCppInfo{' )
948- getCertCInfo ( 'https://wiki .sei.cmu.edu/confluence/pages/viewpage.action?pageId=88046682 ' )
971+ printCertCInfo ( '5 .sei-cert-cpp-coding-standard/3.rules/ ' )
949972print ('};' )
950973print ('' )
951974
0 commit comments