diff --git a/.github/workflows/codeql-multiple-repo-scan.yml b/.github/workflows/codeql-multiple-repo-scan.yml index a22531153b2..cfdb7969791 100644 --- a/.github/workflows/codeql-multiple-repo-scan.yml +++ b/.github/workflows/codeql-multiple-repo-scan.yml @@ -24,10 +24,6 @@ on: workflow_dispatch: permissions: contents: write -# Do not flood CI with unneeded previous runs in PR -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number }} - cancel-in-progress: ${{ github.ref_name != 'main' && !startsWith(github.ref_name, 'release/') }} jobs: analyze-repos: name: Analyze Multiple Repositories @@ -54,11 +50,100 @@ jobs: - name: Parse known_good.json and create repos.json id: parse-repos run: | - scripts/workflow/parse_repos.sh + sudo apt-get update && sudo apt-get install -y jq + JSON_FILE="./known_good.json" + + # Check if the file exists + if [ ! -f "$JSON_FILE" ]; then + echo "Error file not found '$JSON_FILE' " + ls -la . + exit 1 + fi + + # Create repos.json from known_good.json + # This jq command transforms the 'modules' object into an array of repository objects + # with 'name', 'url', 'version' (branch/tag/hash), and 'path'. + jq '[.modules.target_sw | to_entries[] | { + name: .key, + url: .value.repo, + version: (.value.branch // .value.hash // .value.version), + path: ("repos/" + .key) + }]' "$JSON_FILE" > repos.json + + echo "Generated repos.json:" + cat repos.json + echo "" # Add a newline for better readability + + # The following GITHUB_OUTPUT variables are set for each module. + # These might be useful for other steps, but are not directly used by the 'checkout-repos' step + # which now reads 'repos.json' directly. + echo "MODULE_COUNT=$(jq '.modules.target_sw | length' "$JSON_FILE")" >> $GITHUB_OUTPUT + + jq -c '.modules.target_sw | to_entries[]' "$JSON_FILE" | while read -r module_entry; do + module_name=$(echo "$module_entry" | jq -r '.key') + repo_url=$(echo "$module_entry" | jq -r '.value.repo // empty') + version=$(echo "$module_entry" | jq -r '.value.version // empty') + branch=$(echo "$module_entry" | jq -r '.value.branch // empty') + hash=$(echo "$module_entry" | jq -r '.value.hash // empty') + + echo "${module_name}_url=$repo_url" >> $GITHUB_OUTPUT + + if [ -n "$version" ]; then + echo "${module_name}_version=$version" >> $GITHUB_OUTPUT + fi + + if [ -n "$branch" ]; then + echo "${module_name}_branch=$branch" >> $GITHUB_OUTPUT + fi + + if [ -n "$hash" ]; then + echo "${module_name}_hash=$hash" >> $GITHUB_OUTPUT + fi + done - name: Checkout all pinned repositories id: checkout-repos run: | - scripts/workflow/checkout_repos.sh + # jq is already installed by the previous step. + + # Read repositories from the repos.json file created by the previous step + repos=$(cat repos.json) + repo_count=$(echo "$repos" | jq length) + + # Initialize an empty string for paths to be outputted + repo_paths_output="" + + for i in $(seq 0 $((repo_count-1))); do + name=$(echo "$repos" | jq -r ".[$i].name") + url=$(echo "$repos" | jq -r ".[$i].url") + ref=$(echo "$repos" | jq -r ".[$i].version") # This can be a branch, tag, or commit hash + path=$(echo "$repos" | jq -r ".[$i].path") # e.g., "repos/score_baselibs" + + echo "Checking out $name ($ref) to $path" + + # Create the parent directory if it doesn't exist + mkdir -p "$(dirname "$path")" + + # Check if 'ref' looks like a commit hash (e.g., 40 hex characters) + # This is a heuristic; a more robust check might involve fetching refs first. + if [[ "$ref" =~ ^[0-9a-fA-F]{40}$ ]]; then + echo " Detected commit hash. Cloning and then checking out." + git clone "$url" "$path" + (cd "$path" && git checkout "$ref") + else + echo " Detected branch/tag. Cloning with --branch." + git clone --depth 1 --branch "$ref" "$url" "$path" + fi + + # Append the path to the list, separated by commas + if [ -z "$repo_paths_output" ]; then + repo_paths_output="$path" + else + repo_paths_output="$repo_paths_output,$path" + fi + done + + # Output all paths as a single variable + echo "repo_paths=$repo_paths_output" >> $GITHUB_OUTPUT - name: Initialize CodeQL for all repositories uses: github/codeql-action/init@v4 with: @@ -75,7 +160,26 @@ jobs: - name: Recategorize Guidelines if: always() run: | - scripts/workflow/recategorize_guidelines.sh + RECATEGORIZE_SCRIPT="codeql-coding-standards-repo/scripts/guideline_recategorization/recategorize.py" + CODING_STANDARDS_CONFIG="./.github/codeql/coding-standards.yml" + + CODING_STANDARDS_SCHEMA="codeql-coding-standards-repo/schemas/coding-standards-schema-1.0.0.json" + SARIF_SCHEMA="codeql-coding-standards-repo/schemas/sarif-schema-2.1.0.json" + + + SARIF_FILE="sarif-results/cpp.sarif" + + mkdir -p sarif-results-recategorized + echo "Processing $SARIF_FILE for recategorization..." + python3 "$RECATEGORIZE_SCRIPT" \ + --coding-standards-schema-file "$CODING_STANDARDS_SCHEMA" \ + --sarif-schema-file "$SARIF_SCHEMA" \ + "$CODING_STANDARDS_CONFIG" \ + "$SARIF_FILE" \ + "sarif-results-recategorized/$(basename "$SARIF_FILE")" + + rm "$SARIF_FILE" + mv "sarif-results-recategorized/$(basename "$SARIF_FILE")" "$SARIF_FILE" - name: Generate HTML Report from SARIF run: | SARIF_FILE="sarif-results/cpp.sarif" diff --git a/scripts/workflow/recategorize_guidelines.sh b/scripts/workflow/recategorize_guidelines.sh index 8fa4b736020..83d612f70ce 100755 --- a/scripts/workflow/recategorize_guidelines.sh +++ b/scripts/workflow/recategorize_guidelines.sh @@ -15,8 +15,9 @@ RECATEGORIZE_SCRIPT="codeql-coding-standards-repo/scripts/guideline_recategoriza CODING_STANDARDS_CONFIG="./.github/codeql/coding-standards.yml" CODING_STANDARDS_SCHEMA="codeql-coding-standards-repo/schemas/coding-standards-schema-1.0.0.json" SARIF_SCHEMA="codeql-coding-standards-repo/schemas/sarif-schema-2.1.0.json" -SARIF_FILE="sarif-results/cpp.sarif" +SARIF_FILE="sarif-results/cpp.sarif" mkdir -p sarif-results-recategorized + echo "Processing $SARIF_FILE for recategorization..." python3 "$RECATEGORIZE_SCRIPT" \ --coding-standards-schema-file "$CODING_STANDARDS_SCHEMA" \ @@ -24,5 +25,27 @@ python3 "$RECATEGORIZE_SCRIPT" \ "$CODING_STANDARDS_CONFIG" \ "$SARIF_FILE" \ "sarif-results-recategorized/$(basename "$SARIF_FILE")" - rm "$SARIF_FILE" - mv "sarif-results-recategorized/$(basename "$SARIF_FILE")" "$SARIF_FILE" +PY_EXIT=$? +if [ $PY_EXIT -ne 0 ]; then + echo "Recategorization failed (exit code $PY_EXIT). SARIF file not updated." >&2 + exit $PY_EXIT +fi +rm "$SARIF_FILE" +mv "sarif-results-recategorized/$(basename "$SARIF_FILE")" "$SARIF_FILE" + +# # Ensure jq is available +# if ! command -v jq >/dev/null 2>&1; then +# echo "Error: jq is required but not installed. Please install jq and rerun this script." >&2 +# exit 1 +# fi + +# # Filter SARIF to only include results from repos/* (relative or absolute) +# echo "Filtering SARIF results to only include entries with paths matching (^|/)repos/ ..." +# jq '(.runs) |= map(.results |= map(select((.locations // [] | length > 0) and ((.locations[0].physicalLocation.artifactLocation.uri // "") | test("(^|/)repos/")))) )' "$SARIF_FILE" > "${SARIF_FILE}.filtered" +# if [ $? -eq 0 ]; then +# mv "${SARIF_FILE}.filtered" "$SARIF_FILE" +# else +# echo "jq filtering failed. SARIF file was not modified." >&2 +# rm -f "${SARIF_FILE}.filtered" +# exit 1 +# fi