Skip to content

Commit ead3652

Browse files
authored
Fix the Clang static analyser ingestion (#772)
The GitHub team changed their validation when accepting static analysis reports, so I wrote a quick adapter script to fix up the issues.
1 parent ddb0df4 commit ead3652

File tree

2 files changed

+73
-24
lines changed

2 files changed

+73
-24
lines changed

.github/scripts/merge_sarif.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#! /usr/bin/env python3
2+
3+
# The purpose of this file is to adapt the output from
4+
# Clang's static analyzer into a format suitable for GitHub
5+
# Actions. The problem is that Clang outputs a separate "run"
6+
# per file in its SARIF output, but GitHub requires a single
7+
# run per tool (Clang is wrong here).
8+
9+
import sys
10+
import json
11+
12+
if len(sys.argv) < 2:
13+
print("Usage: munge-sarif.py INPUT", file=sys.stderr)
14+
sys.exit(1)
15+
16+
data = None
17+
with open(sys.argv[1], 'rb') as f:
18+
data = json.load(f)
19+
20+
# Arbitrarily pick the first run as the one from which to copy all the properties
21+
base_run = data['runs'][0]
22+
23+
# We don't need these, GitHub ignores them
24+
base_run['artifacts'] = []
25+
26+
# Concatenate results
27+
for r in data['runs'][1:]:
28+
base_run['results'].extend(r['results'])
29+
30+
data['runs'] = [base_run]
31+
32+
def fix_region(region):
33+
startLine = region.get('startLine', None)
34+
startColumn = region.get('startColumn', 1)
35+
endLine = region.get('endLine', None)
36+
endColumn = region.get('endColumn', None)
37+
if startLine is None:
38+
raise ValueError("Region must have startLine")
39+
if endLine is not None and endLine < startLine:
40+
region['endLine'] = startLine
41+
del region['endColumn']
42+
endLine = startLine
43+
endColumn = None
44+
if endColumn is not None and (endLine == startLine or endLine is None) and endColumn < startColumn:
45+
region['endColumn'] = startColumn
46+
endColumn = startColumn
47+
48+
# Recursively scan the data dictionary, and apply the fix_region() function
49+
# to all "region":Region key-value pairs.
50+
def fix_regions(data):
51+
if isinstance(data, dict):
52+
if 'region' in data:
53+
fix_region(data['region'])
54+
for key, value in data.items():
55+
fix_regions(value)
56+
elif isinstance(data, list):
57+
for item in data:
58+
fix_regions(item)
59+
60+
fix_regions(data)
61+
62+
with open(sys.argv[1], 'w') as f:
63+
json.dump(data, f, indent=2)

.github/workflows/clang-analyzer.yml

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,6 @@ jobs:
1818
security-events: write
1919
contents: read
2020

21-
env:
22-
# The @microsoft/sarif-multitool tool actually uses DotnetCore, which in
23-
# turn aborts when it finds that GitHub's CI machine doesn't have ICU.
24-
# Just turn off localisation. A future version of the ubuntu-24.04 or
25-
# ubuntu-latest runners might not need this workaround.
26-
# https://github.com/actions/runner-images/issues/10989
27-
DOTNET_SYSTEM_GLOBALIZATION_INVARIANT: 1
28-
2921
steps:
3022
- name: Setup
3123
run: |
@@ -37,36 +29,30 @@ jobs:
3729
with:
3830
submodules: true
3931

40-
- name: Configure
32+
- name: Configure & Build
4133
run: |
34+
LLVM_VER=`clang --version | head -n1 | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+' | cut -d. -f1`
35+
echo "Using LLVM version $LLVM_VER"
36+
4237
mkdir build
4338
cd build
44-
scan-build cmake -G Ninja -DPCRE2_SUPPORT_JIT=ON -DCMAKE_BUILD_TYPE=Debug ..
4539
46-
- name: Build
47-
run: |
48-
# Inefficiently run clang scan twice; once to generate HTML, and secondly
49-
# to generate SARIF files. Ideally we would have some way to scan once and
50-
# generate one of those outputs from the other, but I don't know a good way
51-
# to do that.
52-
cd build
53-
scan-build -o clang-report/ ninja
40+
scan-build-py-$LLVM_VER -o clang-sarif-root/ --sarif-html sh -c "cmake -G Ninja -DPCRE2_SUPPORT_JIT=ON -DCMAKE_BUILD_TYPE=Debug .. && ninja"
41+
rm clang-sarif-root/*/result-*.sarif
42+
mv clang-sarif-root/* ../clang-report
5443
55-
ninja clean
56-
scan-build -o clang-sarif -sarif ninja
57-
# Work around issue in GitHub's SARIF ingestion - merge all SARIF files into one
58-
npx -y @microsoft/sarif-multitool merge clang-sarif/*/*.sarif --output-file=clang.sarif
44+
../.github/scripts/merge_sarif.py ../clang-report/results-merged.sarif
5945
6046
# Upload the browsable HTML report as an artifact.
6147
- name: Upload report
6248
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
6349
with:
6450
name: "Clang Static Analyzer report"
65-
path: './build/clang-report'
51+
path: './clang-report'
6652

6753
# Upload the results to GitHub's code scanning dashboard.
6854
- name: "Upload to code-scanning"
6955
uses: github/codeql-action/upload-sarif@51f77329afa6477de8c49fc9c7046c15b9a4e79d # v3.29.5
7056
with:
71-
sarif_file: build/clang.sarif
57+
sarif_file: ./clang-report/results-merged.sarif
7258
category: clang-analyzer

0 commit comments

Comments
 (0)