[scan-build-py] Update scan-build-py to allow outputing as SARIF

clang static analysis reports can be generated in html, plist, or sarif
format. This updates scan-build-py to be able to specify SARIF as the
desired output format, as previously it only support plist and html
formats.

Differential Revision: https://reviews.llvm.org/D94251
This commit is contained in:
Daniel Hwang 2021-02-07 18:22:03 -08:00 committed by Haowei Wu
parent d3e13b58cd
commit d72859ffa2
5 changed files with 657 additions and 11 deletions

View File

@ -52,7 +52,8 @@ def scan_build():
args = parse_args_for_scan_build()
# will re-assign the report directory as new output
with report_directory(args.output, args.keep_empty) as args.output:
with report_directory(
args.output, args.keep_empty, args.output_format) as args.output:
# Run against a build command. there are cases, when analyzer run
# is not required. But we need to set up everything for the
# wrappers, because 'configure' needs to capture the CC/CXX values
@ -79,7 +80,7 @@ def analyze_build():
args = parse_args_for_analyze_build()
# will re-assign the report directory as new output
with report_directory(args.output, args.keep_empty) as args.output:
with report_directory(args.output, args.keep_empty, args.output_format) as args.output:
# Run the analyzer against a compilation db.
govern_analyzer_runs(args)
# Cover report generation and bug counting.
@ -336,7 +337,7 @@ def analyze_compiler_wrapper_impl(result, execution):
@contextlib.contextmanager
def report_directory(hint, keep):
def report_directory(hint, keep, output_format):
""" Responsible for the report directory.
hint -- could specify the parent directory of the output directory.
@ -355,7 +356,11 @@ def report_directory(hint, keep):
yield name
finally:
if os.listdir(name):
msg = "Run 'scan-view %s' to examine bug reports."
if output_format != 'sarif':
# 'scan-view' currently does not support sarif format.
msg = "Run 'scan-view %s' to examine bug reports."
else:
msg = "View result at %s/results-merged.sarif."
keep = True
else:
if keep:
@ -433,7 +438,7 @@ def require(required):
'direct_args', # arguments from command line
'force_debug', # kill non debug macros
'output_dir', # where generated report files shall go
'output_format', # it's 'plist', 'html', both or plist-multi-file
'output_format', # it's 'plist', 'html', 'plist-html', 'plist-multi-file', or 'sarif'
'output_failures', # generate crash reports or not
'ctu']) # ctu control options
def run(opts):
@ -537,6 +542,12 @@ def run_analyzer(opts, continuation=report_failure):
dir=opts['output_dir'])
os.close(handle)
return name
elif opts['output_format'] == 'sarif':
(handle, name) = tempfile.mkstemp(prefix='result-',
suffix='.sarif',
dir=opts['output_dir'])
os.close(handle)
return name
return opts['output_dir']
try:

View File

@ -244,6 +244,14 @@ def create_analyze_parser(from_build_command):
action='store_const',
help="""Cause the results as a set of .plist files with extra
information on related files.""")
format_group.add_argument(
'--sarif',
'-sarif',
dest='output_format',
const='sarif',
default='html',
action='store_const',
help="""Cause the results as a result.sarif file.""")
advanced = parser.add_argument_group('advanced options')
advanced.add_argument(

View File

@ -27,6 +27,7 @@ def document(args):
""" Generates cover report and returns the number of bugs/crashes. """
html_reports_available = args.output_format in {'html', 'plist-html'}
sarif_reports_available = args.output_format in {'sarif'}
logging.debug('count crashes and bugs')
crash_count = sum(1 for _ in read_crashes(args.output))
@ -57,6 +58,11 @@ def document(args):
finally:
for fragment in fragments:
os.remove(fragment)
if sarif_reports_available:
logging.debug('merging sarif files')
merge_sarif_files(args.output)
return result
@ -277,6 +283,98 @@ def read_bugs(output_dir, html):
if not duplicate(bug):
yield bug
def merge_sarif_files(output_dir, sort_files=False):
""" Reads and merges all .sarif files in the given output directory.
Each sarif file in the output directory is understood as a single run
and thus appear separate in the top level runs array. This requires
modifying the run index of any embedded links in messages.
"""
def empty(file_name):
return os.stat(file_name).st_size == 0
def update_sarif_object(sarif_object, runs_count_offset):
"""
Given a SARIF object, checks its dictionary entries for a 'message' property.
If it exists, updates the message index of embedded links in the run index.
Recursively looks through entries in the dictionary.
"""
if not isinstance(sarif_object, dict):
return sarif_object
if 'message' in sarif_object:
sarif_object['message'] = match_and_update_run(sarif_object['message'], runs_count_offset)
for key in sarif_object:
if isinstance(sarif_object[key], list):
# iterate through subobjects and update it.
arr = [update_sarif_object(entry, runs_count_offset) for entry in sarif_object[key]]
sarif_object[key] = arr
elif isinstance(sarif_object[key], dict):
sarif_object[key] = update_sarif_object(sarif_object[key], runs_count_offset)
else:
# do nothing
pass
return sarif_object
def match_and_update_run(message, runs_count_offset):
"""
Given a SARIF message object, checks if the text property contains an embedded link and
updates the run index if necessary.
"""
if 'text' not in message:
return message
# we only merge runs, so we only need to update the run index
pattern = re.compile(r'sarif:/runs/(\d+)')
text = message['text']
matches = re.finditer(pattern, text)
matches_list = list(matches)
# update matches from right to left to make increasing character length (9->10) smoother
for idx in range(len(matches_list) - 1, -1, -1):
match = matches_list[idx]
new_run_count = str(runs_count_offset + int(match.group(1)))
text = text[0:match.start(1)] + new_run_count + text[match.end(1):]
message['text'] = text
return message
sarif_files = (file for file in glob.iglob(os.path.join(output_dir, '*.sarif')) if not empty(file))
# exposed for testing since the order of files returned by glob is not guaranteed to be sorted
if sort_files:
sarif_files = list(sarif_files)
sarif_files.sort()
runs_count = 0
merged = {}
for sarif_file in sarif_files:
with open(sarif_file) as fp:
sarif = json.load(fp)
if 'runs' not in sarif:
continue
# start with the first file
if not merged:
merged = sarif
else:
# extract the run and append it to the merged output
for run in sarif['runs']:
new_run = update_sarif_object(run, runs_count)
merged['runs'].append(new_run)
runs_count += len(sarif['runs'])
with open(os.path.join(output_dir, 'results-merged.sarif'), 'w') as out:
json.dump(merged, out, indent=4, sort_keys=True)
def parse_bug_plist(filename):
""" Returns the generator of bugs from a single .plist file. """

View File

@ -128,7 +128,7 @@ class Spy(object):
class RunAnalyzerTest(unittest.TestCase):
@staticmethod
def run_analyzer(content, failures_report):
def run_analyzer(content, failures_report, output_format='plist'):
with libear.TemporaryDirectory() as tmpdir:
filename = os.path.join(tmpdir, 'test.cpp')
with open(filename, 'w') as handle:
@ -141,31 +141,46 @@ class RunAnalyzerTest(unittest.TestCase):
'direct_args': [],
'file': filename,
'output_dir': tmpdir,
'output_format': 'plist',
'output_format': output_format,
'output_failures': failures_report
}
spy = Spy()
result = sut.run_analyzer(opts, spy.call)
return (result, spy.arg)
output_files = []
for entry in os.listdir(tmpdir):
output_files.append(entry)
return (result, spy.arg, output_files)
def test_run_analyzer(self):
content = "int div(int n, int d) { return n / d; }"
(result, fwds) = RunAnalyzerTest.run_analyzer(content, False)
(result, fwds, _) = RunAnalyzerTest.run_analyzer(content, False)
self.assertEqual(None, fwds)
self.assertEqual(0, result['exit_code'])
def test_run_analyzer_crash(self):
content = "int div(int n, int d) { return n / d }"
(result, fwds) = RunAnalyzerTest.run_analyzer(content, False)
(result, fwds, _) = RunAnalyzerTest.run_analyzer(content, False)
self.assertEqual(None, fwds)
self.assertEqual(1, result['exit_code'])
def test_run_analyzer_crash_and_forwarded(self):
content = "int div(int n, int d) { return n / d }"
(_, fwds) = RunAnalyzerTest.run_analyzer(content, True)
(_, fwds, _) = RunAnalyzerTest.run_analyzer(content, True)
self.assertEqual(1, fwds['exit_code'])
self.assertTrue(len(fwds['error_output']) > 0)
def test_run_analyzer_with_sarif(self):
content = "int div(int n, int d) { return n / d; }"
(result, fwds, output_files) = RunAnalyzerTest.run_analyzer(content, False, output_format='sarif')
self.assertEqual(None, fwds)
self.assertEqual(0, result['exit_code'])
pattern = re.compile(r'^result-.+\.sarif$')
for f in output_files:
if re.match(pattern, f):
return
self.fail('no result sarif files found in output')
class ReportFailureTest(unittest.TestCase):

View File

@ -3,6 +3,7 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
import json
import libear
import libscanbuild.report as sut
import unittest
@ -145,3 +146,516 @@ class GetPrefixFromCompilationDatabaseTest(unittest.TestCase):
def test_empty(self):
self.assertEqual(
sut.commonprefix([]), '')
class MergeSarifTest(unittest.TestCase):
def test_merging_sarif(self):
sarif1 = {
'$schema': 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json',
'runs': [
{
'artifacts': [
{
'length': 100,
'location': {
'uri': '//clang/tools/scan-build-py/tests/unit/test_report.py'
},
'mimeType': 'text/plain',
'roles': [
'resultFile'
]
}
],
'columnKind': 'unicodeCodePoints',
'results': [
{
'codeFlows': [
{
'threadFlows': [
{
'locations': [
{
'importance': 'important',
'location': {
'message': {
'text': 'test message 1'
},
'physicalLocation': {
'artifactLocation': {
'index': 0,
'uri': '//clang/tools/scan-build-py/tests/unit/test_report.py'
},
'region': {
'endColumn': 5,
'startColumn': 1,
'startLine': 2
}
}
}
}
]
}
]
}
]
},
{
'codeFlows': [
{
'threadFlows': [
{
'locations': [
{
'importance': 'important',
'location': {
'message': {
'text': 'test message 2'
},
'physicalLocation': {
'artifactLocation': {
'index': 0,
'uri': '//clang/tools/scan-build-py/tests/unit/test_report.py'
},
'region': {
'endColumn': 23,
'startColumn': 9,
'startLine': 10
}
}
}
}
]
}
]
}
]
}
],
'tool': {
'driver': {
'fullName': 'clang static analyzer',
'language': 'en-US',
'name': 'clang',
'rules': [
{
'fullDescription': {
'text': 'test rule for merge sarif test'
},
'helpUrl': '//clang/tools/scan-build-py/tests/unit/test_report.py',
'id': 'testId',
'name': 'testName'
}
],
'version': 'test clang'
}
}
}
],
'version': '2.1.0'
}
sarif2 = {
'$schema': 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json',
'runs': [
{
'artifacts': [
{
'length': 1523,
'location': {
'uri': '//clang/tools/scan-build-py/tests/unit/test_report.py'
},
'mimeType': 'text/plain',
'roles': [
'resultFile'
]
}
],
'columnKind': 'unicodeCodePoints',
'results': [
{
'codeFlows': [
{
'threadFlows': [
{
'locations': [
{
'importance': 'important',
'location': {
'message': {
'text': 'test message 3'
},
'physicalLocation': {
'artifactLocation': {
'index': 0,
'uri': '//clang/tools/scan-build-py/tests/unit/test_report.py'
},
'region': {
'endColumn': 99,
'startColumn': 99,
'startLine': 17
}
}
}
}
]
}
]
}
]
},
{
'codeFlows': [
{
'threadFlows': [
{
'locations': [
{
'importance': 'important',
'location': {
'message': {
'text': 'test message 4'
},
'physicalLocation': {
'artifactLocation': {
'index': 0,
'uri': '//clang/tools/scan-build-py/tests/unit/test_report.py'
},
'region': {
'endColumn': 305,
'startColumn': 304,
'startLine': 1
}
}
}
}
]
}
]
}
]
}
],
'tool': {
'driver': {
'fullName': 'clang static analyzer',
'language': 'en-US',
'name': 'clang',
'rules': [
{
'fullDescription': {
'text': 'test rule for merge sarif test'
},
'helpUrl': '//clang/tools/scan-build-py/tests/unit/test_report.py',
'id': 'testId',
'name': 'testName'
}
],
'version': 'test clang'
}
}
}
],
'version': '2.1.0'
}
contents = [sarif1, sarif2]
with libear.TemporaryDirectory() as tmpdir:
for idx, content in enumerate(contents):
file_name = os.path.join(tmpdir, 'results-{}.sarif'.format(idx))
with open(file_name, 'w') as handle:
json.dump(content, handle)
sut.merge_sarif_files(tmpdir, sort_files=True)
self.assertIn('results-merged.sarif', os.listdir(tmpdir))
with open(os.path.join(tmpdir, 'results-merged.sarif')) as f:
merged = json.load(f)
self.assertEqual(len(merged['runs']), 2)
self.assertEqual(len(merged['runs'][0]['results']), 2)
self.assertEqual(len(merged['runs'][1]['results']), 2)
expected = sarif1
for run in sarif2['runs']:
expected['runs'].append(run)
self.assertEqual(merged, expected)
def test_merge_updates_embedded_link(self):
sarif1 = {
'runs': [
{
'results': [
{
'codeFlows': [
{
'message': {
'text': 'test message 1-1 [link](sarif:/runs/1/results/0) [link2](sarif:/runs/1/results/0)'
},
'threadFlows': [
{
'message': {
'text': 'test message 1-2 [link](sarif:/runs/1/results/0)'
}
}
]
}
]
}
]
},
{
'results': [
{
'codeFlows': [
{
'message': {
'text': 'test message 2-1 [link](sarif:/runs/0/results/0)'
},
'threadFlows': [
{
'message': {
'text': 'test message 2-2 [link](sarif:/runs/0/results/0)'
}
}
]
}
]
}
]
}
]
}
sarif2 = {
'runs': [
{
'results': [
{
'codeFlows': [
{
'message': {
'text': 'test message 3-1 [link](sarif:/runs/1/results/0) [link2](sarif:/runs/1/results/0)'
},
'threadFlows': [
{
'message': {
'text': 'test message 3-2 [link](sarif:/runs/1/results/0)'
}
}
]
}
]
}
],
},
{
'results': [
{
'codeFlows': [
{
'message': {
'text': 'test message 4-1 [link](sarif:/runs/0/results/0)'
},
'threadFlows': [
{
'message': {
'text': 'test message 4-2 [link](sarif:/runs/0/results/0)'
}
}
]
}
]
}
]
}
]
}
sarif3 = {
'runs': [
{
'results': [
{
'codeFlows': [
{
'message': {
'text': 'test message 5-1 [link](sarif:/runs/1/results/0) [link2](sarif:/runs/1/results/0)'
},
'threadFlows': [
{
'message': {
'text': 'test message 5-2 [link](sarif:/runs/1/results/0)'
}
}
]
}
]
}
],
},
{
'results': [
{
'codeFlows': [
{
'message': {
'text': 'test message 6-1 [link](sarif:/runs/0/results/0)'
},
'threadFlows': [
{
'message': {
'text': 'test message 6-2 [link](sarif:/runs/0/results/0)'
}
}
]
}
]
}
]
}
]
}
contents = [sarif1, sarif2, sarif3]
with libear.TemporaryDirectory() as tmpdir:
for idx, content in enumerate(contents):
file_name = os.path.join(tmpdir, 'results-{}.sarif'.format(idx))
with open(file_name, 'w') as handle:
json.dump(content, handle)
sut.merge_sarif_files(tmpdir, sort_files=True)
self.assertIn('results-merged.sarif', os.listdir(tmpdir))
with open(os.path.join(tmpdir, 'results-merged.sarif')) as f:
merged = json.load(f)
self.assertEqual(len(merged['runs']), 6)
code_flows = [merged['runs'][x]['results'][0]['codeFlows'][0]['message']['text'] for x in range(6)]
thread_flows = [merged['runs'][x]['results'][0]['codeFlows'][0]['threadFlows'][0]['message']['text'] for x in range(6)]
# The run index should be updated for the second and third sets of runs
self.assertEqual(code_flows,
[
'test message 1-1 [link](sarif:/runs/1/results/0) [link2](sarif:/runs/1/results/0)',
'test message 2-1 [link](sarif:/runs/0/results/0)',
'test message 3-1 [link](sarif:/runs/3/results/0) [link2](sarif:/runs/3/results/0)',
'test message 4-1 [link](sarif:/runs/2/results/0)',
'test message 5-1 [link](sarif:/runs/5/results/0) [link2](sarif:/runs/5/results/0)',
'test message 6-1 [link](sarif:/runs/4/results/0)'
])
self.assertEquals(thread_flows,
[
'test message 1-2 [link](sarif:/runs/1/results/0)',
'test message 2-2 [link](sarif:/runs/0/results/0)',
'test message 3-2 [link](sarif:/runs/3/results/0)',
'test message 4-2 [link](sarif:/runs/2/results/0)',
'test message 5-2 [link](sarif:/runs/5/results/0)',
'test message 6-2 [link](sarif:/runs/4/results/0)'
])
def test_overflow_run_count(self):
sarif1 = {
'runs': [
{'results': [{
'message': {'text': 'run 1-0 [link](sarif:/runs/1/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 1-1 [link](sarif:/runs/2/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 1-2 [link](sarif:/runs/3/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 1-3 [link](sarif:/runs/4/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 1-4 [link](sarif:/runs/5/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 1-5 [link](sarif:/runs/6/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 1-6 [link](sarif:/runs/7/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 1-7 [link](sarif:/runs/8/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 1-8 [link](sarif:/runs/9/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 1-9 [link](sarif:/runs/0/results/0)'}
}]}
]
}
sarif2 = {
'runs': [
{'results': [{
'message': {'text': 'run 2-0 [link](sarif:/runs/1/results/0) [link2](sarif:/runs/2/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 2-1 [link](sarif:/runs/2/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 2-2 [link](sarif:/runs/3/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 2-3 [link](sarif:/runs/4/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 2-4 [link](sarif:/runs/5/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 2-5 [link](sarif:/runs/6/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 2-6 [link](sarif:/runs/7/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 2-7 [link](sarif:/runs/8/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 2-8 [link](sarif:/runs/9/results/0)'}
}]},
{'results': [{
'message': {'text': 'run 2-9 [link](sarif:/runs/0/results/0)'}
}]}
]
}
contents = [sarif1, sarif2]
with libear.TemporaryDirectory() as tmpdir:
for idx, content in enumerate(contents):
file_name = os.path.join(tmpdir, 'results-{}.sarif'.format(idx))
with open(file_name, 'w') as handle:
json.dump(content, handle)
sut.merge_sarif_files(tmpdir, sort_files=True)
self.assertIn('results-merged.sarif', os.listdir(tmpdir))
with open(os.path.join(tmpdir, 'results-merged.sarif')) as f:
merged = json.load(f)
self.assertEqual(len(merged['runs']), 20)
messages = [merged['runs'][x]['results'][0]['message']['text'] for x in range(20)]
self.assertEqual(messages,
[
'run 1-0 [link](sarif:/runs/1/results/0)',
'run 1-1 [link](sarif:/runs/2/results/0)',
'run 1-2 [link](sarif:/runs/3/results/0)',
'run 1-3 [link](sarif:/runs/4/results/0)',
'run 1-4 [link](sarif:/runs/5/results/0)',
'run 1-5 [link](sarif:/runs/6/results/0)',
'run 1-6 [link](sarif:/runs/7/results/0)',
'run 1-7 [link](sarif:/runs/8/results/0)',
'run 1-8 [link](sarif:/runs/9/results/0)',
'run 1-9 [link](sarif:/runs/0/results/0)',
'run 2-0 [link](sarif:/runs/11/results/0) [link2](sarif:/runs/12/results/0)',
'run 2-1 [link](sarif:/runs/12/results/0)',
'run 2-2 [link](sarif:/runs/13/results/0)',
'run 2-3 [link](sarif:/runs/14/results/0)',
'run 2-4 [link](sarif:/runs/15/results/0)',
'run 2-5 [link](sarif:/runs/16/results/0)',
'run 2-6 [link](sarif:/runs/17/results/0)',
'run 2-7 [link](sarif:/runs/18/results/0)',
'run 2-8 [link](sarif:/runs/19/results/0)',
'run 2-9 [link](sarif:/runs/10/results/0)'
])