forked from OSchip/llvm-project
324 lines
11 KiB
Python
Executable File
324 lines
11 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
"""
|
|
CmpRuns - A simple tool for comparing two static analyzer runs to determine
|
|
which reports have been added, removed, or changed.
|
|
|
|
This is designed to support automated testing using the static analyzer, from
|
|
two perspectives:
|
|
1. To monitor changes in the static analyzer's reports on real code bases, for
|
|
regression testing.
|
|
|
|
2. For use by end users who want to integrate regular static analyzer testing
|
|
into a buildbot like environment.
|
|
|
|
Usage:
|
|
|
|
# Load the results of both runs, to obtain lists of the corresponding
|
|
# AnalysisDiagnostic objects.
|
|
#
|
|
resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
|
|
resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
|
|
|
|
# Generate a relation from diagnostics in run A to diagnostics in run B
|
|
# to obtain a list of triples (a, b, confidence).
|
|
diff = compareResults(resultsA, resultsB)
|
|
|
|
"""
|
|
|
|
import os
|
|
import plistlib
|
|
import CmpRuns
|
|
|
|
# Information about analysis run:
|
|
# path - the analysis output directory
|
|
# root - the name of the root directory, which will be disregarded when
|
|
# determining the source file name
|
|
class SingleRunInfo:
|
|
def __init__(self, path, root="", verboseLog=None):
|
|
self.path = path
|
|
self.root = root
|
|
self.verboseLog = verboseLog
|
|
|
|
class AnalysisDiagnostic:
|
|
def __init__(self, data, report, htmlReport):
|
|
self._data = data
|
|
self._loc = self._data['location']
|
|
self._report = report
|
|
self._htmlReport = htmlReport
|
|
|
|
def getFileName(self):
|
|
root = self._report.run.root
|
|
fileName = self._report.files[self._loc['file']]
|
|
if fileName.startswith(root) :
|
|
return fileName[len(root):]
|
|
return fileName
|
|
|
|
def getLine(self):
|
|
return self._loc['line']
|
|
|
|
def getColumn(self):
|
|
return self._loc['col']
|
|
|
|
def getCategory(self):
|
|
return self._data['category']
|
|
|
|
def getDescription(self):
|
|
return self._data['description']
|
|
|
|
def getIssueIdentifier(self) :
|
|
id = self.getFileName() + "+"
|
|
if 'issue_context' in self._data :
|
|
id += self._data['issue_context'] + "+"
|
|
if 'issue_hash' in self._data :
|
|
id += str(self._data['issue_hash'])
|
|
return id
|
|
|
|
def getReport(self):
|
|
if self._htmlReport is None:
|
|
return " "
|
|
return os.path.join(self._report.run.path, self._htmlReport)
|
|
|
|
def getReadableName(self):
|
|
return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
|
|
self.getColumn(), self.getCategory(),
|
|
self.getDescription())
|
|
|
|
# Note, the data format is not an API and may change from one analyzer
|
|
# version to another.
|
|
def getRawData(self):
|
|
return self._data
|
|
|
|
class multidict:
|
|
def __init__(self, elts=()):
|
|
self.data = {}
|
|
for key,value in elts:
|
|
self[key] = value
|
|
|
|
def __getitem__(self, item):
|
|
return self.data[item]
|
|
def __setitem__(self, key, value):
|
|
if key in self.data:
|
|
self.data[key].append(value)
|
|
else:
|
|
self.data[key] = [value]
|
|
def items(self):
|
|
return self.data.items()
|
|
def values(self):
|
|
return self.data.values()
|
|
def keys(self):
|
|
return self.data.keys()
|
|
def __len__(self):
|
|
return len(self.data)
|
|
def get(self, key, default=None):
|
|
return self.data.get(key, default)
|
|
|
|
class CmpOptions:
|
|
def __init__(self, verboseLog=None, rootA="", rootB=""):
|
|
self.rootA = rootA
|
|
self.rootB = rootB
|
|
self.verboseLog = verboseLog
|
|
|
|
class AnalysisReport:
|
|
def __init__(self, run, files):
|
|
self.run = run
|
|
self.files = files
|
|
self.diagnostics = []
|
|
|
|
class AnalysisRun:
|
|
def __init__(self, info):
|
|
self.path = info.path
|
|
self.root = info.root
|
|
self.info = info
|
|
self.reports = []
|
|
# Cumulative list of all diagnostics from all the reports.
|
|
self.diagnostics = []
|
|
self.clang_version = None
|
|
|
|
def getClangVersion(self):
|
|
return self.clang_version
|
|
|
|
|
|
# Backward compatibility API.
|
|
def loadResults(path, opts, root = "", deleteEmpty=True):
|
|
return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
|
|
deleteEmpty)
|
|
|
|
# Load results of the analyzes from a given output folder.
|
|
# - info is the SingleRunInfo object
|
|
# - deleteEmpty specifies if the empty plist files should be deleted
|
|
def loadResultsFromSingleRun(info, deleteEmpty=True):
|
|
path = info.path
|
|
run = AnalysisRun(info)
|
|
|
|
for (dirpath, dirnames, filenames) in os.walk(path):
|
|
for f in filenames:
|
|
if (not f.endswith('plist')):
|
|
continue
|
|
|
|
p = os.path.join(dirpath, f)
|
|
data = plistlib.readPlist(p)
|
|
|
|
# We want to retrieve the clang version even if there are no
|
|
# reports. Assume that all reports were created using the same
|
|
# clang version (this is always true and is more efficient).
|
|
if ('clang_version' in data) :
|
|
if (run.clang_version == None) :
|
|
run.clang_version = data.pop('clang_version')
|
|
else:
|
|
data.pop('clang_version')
|
|
|
|
# Ignore/delete empty reports.
|
|
if not data['files']:
|
|
if deleteEmpty == True:
|
|
os.remove(p)
|
|
continue
|
|
|
|
# Extract the HTML reports, if they exists.
|
|
if 'HTMLDiagnostics_files' in data['diagnostics'][0]:
|
|
htmlFiles = []
|
|
for d in data['diagnostics']:
|
|
# FIXME: Why is this named files, when does it have multiple
|
|
# files?
|
|
assert len(d['HTMLDiagnostics_files']) == 1
|
|
htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
|
|
else:
|
|
htmlFiles = [None] * len(data['diagnostics'])
|
|
|
|
report = AnalysisReport(run, data.pop('files'))
|
|
diagnostics = [AnalysisDiagnostic(d, report, h)
|
|
for d,h in zip(data.pop('diagnostics'),
|
|
htmlFiles)]
|
|
|
|
assert not data
|
|
|
|
report.diagnostics.extend(diagnostics)
|
|
run.reports.append(report)
|
|
run.diagnostics.extend(diagnostics)
|
|
|
|
return run
|
|
|
|
def cmpAnalysisDiagnostic(d) :
|
|
return d.getIssueIdentifier()
|
|
|
|
def compareResults(A, B):
|
|
"""
|
|
compareResults - Generate a relation from diagnostics in run A to
|
|
diagnostics in run B.
|
|
|
|
The result is the relation as a list of triples (a, b, confidence) where
|
|
each element {a,b} is None or an element from the respective run, and
|
|
confidence is a measure of the match quality (where 0 indicates equality,
|
|
and None is used if either element is None).
|
|
"""
|
|
|
|
res = []
|
|
|
|
# Quickly eliminate equal elements.
|
|
neqA = []
|
|
neqB = []
|
|
eltsA = list(A.diagnostics)
|
|
eltsB = list(B.diagnostics)
|
|
eltsA.sort(key = cmpAnalysisDiagnostic)
|
|
eltsB.sort(key = cmpAnalysisDiagnostic)
|
|
while eltsA and eltsB:
|
|
a = eltsA.pop()
|
|
b = eltsB.pop()
|
|
if (a.getIssueIdentifier() == b.getIssueIdentifier()) :
|
|
res.append((a, b, 0))
|
|
elif a.getIssueIdentifier() > b.getIssueIdentifier():
|
|
eltsB.append(b)
|
|
neqA.append(a)
|
|
else:
|
|
eltsA.append(a)
|
|
neqB.append(b)
|
|
neqA.extend(eltsA)
|
|
neqB.extend(eltsB)
|
|
|
|
# FIXME: Add fuzzy matching. One simple and possible effective idea would be
|
|
# to bin the diagnostics, print them in a normalized form (based solely on
|
|
# the structure of the diagnostic), compute the diff, then use that as the
|
|
# basis for matching. This has the nice property that we don't depend in any
|
|
# way on the diagnostic format.
|
|
|
|
for a in neqA:
|
|
res.append((a, None, None))
|
|
for b in neqB:
|
|
res.append((None, b, None))
|
|
|
|
return res
|
|
|
|
def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
|
|
# Load the run results.
|
|
resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
|
|
resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
|
|
|
|
# Open the verbose log, if given.
|
|
if opts.verboseLog:
|
|
auxLog = open(opts.verboseLog, "wb")
|
|
else:
|
|
auxLog = None
|
|
|
|
diff = compareResults(resultsA, resultsB)
|
|
foundDiffs = 0
|
|
for res in diff:
|
|
a,b,confidence = res
|
|
if a is None:
|
|
print "ADDED: %r" % b.getReadableName()
|
|
foundDiffs += 1
|
|
if auxLog:
|
|
print >>auxLog, ("('ADDED', %r, %r)" % (b.getReadableName(),
|
|
b.getReport()))
|
|
elif b is None:
|
|
print "REMOVED: %r" % a.getReadableName()
|
|
foundDiffs += 1
|
|
if auxLog:
|
|
print >>auxLog, ("('REMOVED', %r, %r)" % (a.getReadableName(),
|
|
a.getReport()))
|
|
elif confidence:
|
|
print "CHANGED: %r to %r" % (a.getReadableName(),
|
|
b.getReadableName())
|
|
foundDiffs += 1
|
|
if auxLog:
|
|
print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
|
|
% (a.getReadableName(),
|
|
b.getReadableName(),
|
|
a.getReport(),
|
|
b.getReport()))
|
|
else:
|
|
pass
|
|
|
|
TotalReports = len(resultsB.diagnostics)
|
|
print "TOTAL REPORTS: %r" % TotalReports
|
|
print "TOTAL DIFFERENCES: %r" % foundDiffs
|
|
if auxLog:
|
|
print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
|
|
print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
|
|
|
|
return foundDiffs
|
|
|
|
def main():
|
|
from optparse import OptionParser
|
|
parser = OptionParser("usage: %prog [options] [dir A] [dir B]")
|
|
parser.add_option("", "--rootA", dest="rootA",
|
|
help="Prefix to ignore on source files for directory A",
|
|
action="store", type=str, default="")
|
|
parser.add_option("", "--rootB", dest="rootB",
|
|
help="Prefix to ignore on source files for directory B",
|
|
action="store", type=str, default="")
|
|
parser.add_option("", "--verbose-log", dest="verboseLog",
|
|
help="Write additional information to LOG [default=None]",
|
|
action="store", type=str, default=None,
|
|
metavar="LOG")
|
|
(opts, args) = parser.parse_args()
|
|
|
|
if len(args) != 2:
|
|
parser.error("invalid number of arguments")
|
|
|
|
dirA,dirB = args
|
|
|
|
dumpScanBuildResultsDiff(dirA, dirB, opts)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|