[analyzer][scan-build-py] subprocess output handling reviewed in clang module

llvm-svn: 282317
This commit is contained in:
Laszlo Nagy 2016-09-24 00:20:59 +00:00
parent 0800b81a21
commit 4f6a175f10
3 changed files with 165 additions and 109 deletions

View File

@ -269,6 +269,9 @@ def validate(parser, args, from_build_command):
""" Validation done by the parser itself, but semantic check still
needs to be done. This method is doing that. """
# Make plugins always a list. (It might be None when not specified.)
args.plugins = args.plugins if args.plugins else []
if args.help_checkers_verbose:
print_checkers(get_checkers(args.clang, args.plugins))
parser.exit()

View File

@ -15,142 +15,143 @@ from libscanbuild.shell import decode
__all__ = ['get_version', 'get_arguments', 'get_checkers']
# regex for activated checker
ACTIVE_CHECKER_PATTERN = re.compile(r'^-analyzer-checker=(.*)$')
def get_version(cmd):
""" Returns the compiler version as string. """
lines = subprocess.check_output([cmd, '-v'], stderr=subprocess.STDOUT)
return lines.decode('ascii').splitlines()[0]
def get_version(clang):
""" Returns the compiler version as string.
:param clang: the compiler we are using
:return: the version string printed to stderr """
output = subprocess.check_output([clang, '-v'], stderr=subprocess.STDOUT)
return output.decode('utf-8').splitlines()[0]
def get_arguments(command, cwd):
""" Capture Clang invocation.
This method returns the front-end invocation that would be executed as
a result of the given driver invocation. """
def lastline(stream):
last = None
for line in stream:
last = line
if last is None:
raise Exception("output not found")
return last
:param command: the compilation command
:param cwd: the current working directory
:return: the detailed front-end invocation command """
cmd = command[:]
cmd.insert(1, '-###')
logging.debug('exec command in %s: %s', cwd, ' '.join(cmd))
child = subprocess.Popen(cmd,
cwd=cwd,
universal_newlines=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
line = lastline(child.stdout)
child.stdout.close()
child.wait()
if child.returncode == 0:
if re.search(r'clang(.*): error:', line):
raise Exception(line)
return decode(line)
else:
raise Exception(line)
output = subprocess.check_output(cmd, cwd=cwd, stderr=subprocess.STDOUT)
# The relevant information is in the last line of the output.
# Don't check if finding last line fails, would throw exception anyway.
last_line = output.decode('utf-8').splitlines()[-1]
if re.search(r'clang(.*): error:', last_line):
raise Exception(last_line)
return decode(last_line)
def get_active_checkers(clang, plugins):
""" To get the default plugins we execute Clang to print how this
compilation would be called.
""" Get the active checker list.
For input file we specify stdin and pass only language information. """
:param clang: the compiler we are using
:param plugins: list of plugins which was requested by the user
:return: list of checker names which are active
def checkers(language):
To get the default checkers we execute Clang to print how this
compilation would be called. And take out the enabled checker from the
arguments. For input file we specify stdin and pass only language
information. """
def get_active_checkers_for(language):
""" Returns a list of active checkers for the given language. """
load = [elem
for plugin in plugins
for elem in ['-Xclang', '-load', '-Xclang', plugin]]
cmd = [clang, '--analyze'] + load + ['-x', language, '-']
pattern = re.compile(r'^-analyzer-checker=(.*)$')
return [pattern.match(arg).group(1)
for arg in get_arguments(cmd, '.') if pattern.match(arg)]
load_args = [arg
for plugin in plugins
for arg in ['-Xclang', '-load', '-Xclang', plugin]]
cmd = [clang, '--analyze'] + load_args + ['-x', language, '-']
return [ACTIVE_CHECKER_PATTERN.match(arg).group(1)
for arg in get_arguments(cmd, '.')
if ACTIVE_CHECKER_PATTERN.match(arg)]
result = set()
for language in ['c', 'c++', 'objective-c', 'objective-c++']:
result.update(checkers(language))
return result
result.update(get_active_checkers_for(language))
return frozenset(result)
def is_active(checkers):
""" Returns a method, which classifies the checker active or not,
based on the received checker name list. """
def predicate(checker):
""" Returns True if the given checker is active. """
return any(pattern.match(checker) for pattern in predicate.patterns)
predicate.patterns = [re.compile(r'^' + a + r'(\.|$)') for a in checkers]
return predicate
def parse_checkers(stream):
""" Parse clang -analyzer-checker-help output.
Below the line 'CHECKERS:' are there the name description pairs.
Many of them are in one line, but some long named checker has the
name and the description in separate lines.
The checker name is always prefixed with two space character. The
name contains no whitespaces. Then followed by newline (if it's
too long) or other space characters comes the description of the
checker. The description ends with a newline character.
:param stream: list of lines to parse
:return: generator of tuples
(<checker name>, <checker description>) """
lines = iter(stream)
# find checkers header
for line in lines:
if re.match(r'^CHECKERS:', line):
break
# find entries
state = None
for line in lines:
if state and not re.match(r'^\s\s\S', line):
yield (state, line.strip())
state = None
elif re.match(r'^\s\s\S+$', line.rstrip()):
state = line.strip()
else:
pattern = re.compile(r'^\s\s(?P<key>\S*)\s*(?P<value>.*)')
match = pattern.match(line.rstrip())
if match:
current = match.groupdict()
yield (current['key'], current['value'])
def get_checkers(clang, plugins):
""" Get all the available checkers from default and from the plugins.
clang -- the compiler we are using
plugins -- list of plugins which was requested by the user
:param clang: the compiler we are using
:param plugins: list of plugins which was requested by the user
:return: a dictionary of all available checkers and its status
This method returns a dictionary of all available checkers and status.
{<plugin name>: (<plugin description>, <is active by default>)} """
plugins = plugins if plugins else []
def parse_checkers(stream):
""" Parse clang -analyzer-checker-help output.
Below the line 'CHECKERS:' are there the name description pairs.
Many of them are in one line, but some long named plugins has the
name and the description in separate lines.
The plugin name is always prefixed with two space character. The
name contains no whitespaces. Then followed by newline (if it's
too long) or other space characters comes the description of the
plugin. The description ends with a newline character. """
# find checkers header
for line in stream:
if re.match(r'^CHECKERS:', line):
break
# find entries
state = None
for line in stream:
if state and not re.match(r'^\s\s\S', line):
yield (state, line.strip())
state = None
elif re.match(r'^\s\s\S+$', line.rstrip()):
state = line.strip()
else:
pattern = re.compile(r'^\s\s(?P<key>\S*)\s*(?P<value>.*)')
match = pattern.match(line.rstrip())
if match:
current = match.groupdict()
yield (current['key'], current['value'])
def is_active(actives, entry):
""" Returns true if plugin name is matching the active plugin names.
actives -- set of active plugin names (or prefixes).
entry -- the current plugin name to judge.
The active plugin names are specific plugin names or prefix of some
names. One example for prefix, when it say 'unix' and it shall match
on 'unix.API', 'unix.Malloc' and 'unix.MallocSizeof'. """
return any(re.match(r'^' + a + r'(\.|$)', entry) for a in actives)
actives = get_active_checkers(clang, plugins)
{<checker name>: (<checker description>, <is active by default>)} """
load = [elem for plugin in plugins for elem in ['-load', plugin]]
cmd = [clang, '-cc1'] + load + ['-analyzer-checker-help']
logging.debug('exec command: %s', ' '.join(cmd))
child = subprocess.Popen(cmd,
universal_newlines=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
lines = output.decode('utf-8').splitlines()
is_active_checker = is_active(get_active_checkers(clang, plugins))
checkers = {
k: (v, is_active(actives, k))
for k, v in parse_checkers(child.stdout)
name: (description, is_active_checker(name))
for name, description in parse_checkers(lines)
}
child.stdout.close()
child.wait()
if child.returncode == 0 and len(checkers):
return checkers
else:
if not checkers:
raise Exception('Could not query Clang for available checkers.')
return checkers

View File

@ -8,9 +8,19 @@ import libear
import libscanbuild.clang as sut
import unittest
import os.path
import sys
class GetClangArgumentsTest(unittest.TestCase):
class ClangGetVersion(unittest.TestCase):
def test_get_version_is_not_empty(self):
self.assertTrue(sut.get_version('clang'))
def test_get_version_throws(self):
with self.assertRaises(OSError):
sut.get_version('notexists')
class ClangGetArgumentsTest(unittest.TestCase):
def test_get_clang_arguments(self):
with libear.TemporaryDirectory() as tmpdir:
filename = os.path.join(tmpdir, 'test.c')
@ -25,18 +35,60 @@ class GetClangArgumentsTest(unittest.TestCase):
self.assertTrue('var="this is it"' in result)
def test_get_clang_arguments_fails(self):
self.assertRaises(
Exception, sut.get_arguments,
['clang', '-###', '-fsyntax-only', '-x', 'c', 'notexist.c'], '.')
with self.assertRaises(Exception):
sut.get_arguments(['clang', '-x', 'c', 'notexist.c'], '.')
def test_get_clang_arguments_fails_badly(self):
with self.assertRaises(OSError):
sut.get_arguments(['notexist'], '.')
class GetCheckersTest(unittest.TestCase):
class ClangGetCheckersTest(unittest.TestCase):
def test_get_checkers(self):
# this test is only to see is not crashing
result = sut.get_checkers('clang', [])
self.assertTrue(len(result))
# do check result types
string_type = unicode if sys.version_info < (3,) else str
for key, value in result.items():
self.assertEqual(string_type, type(key))
self.assertEqual(string_type, type(value[0]))
self.assertEqual(bool, type(value[1]))
def test_get_active_checkers(self):
# this test is only to see is not crashing
result = sut.get_active_checkers('clang', [])
self.assertTrue(len(result))
# do check result types
for value in result:
self.assertEqual(str, type(value))
def test_is_active(self):
test = sut.is_active(['a', 'b.b', 'c.c.c'])
self.assertTrue(test('a'))
self.assertTrue(test('a.b'))
self.assertTrue(test('b.b'))
self.assertTrue(test('b.b.c'))
self.assertTrue(test('c.c.c.p'))
self.assertFalse(test('ab'))
self.assertFalse(test('ba'))
self.assertFalse(test('bb'))
self.assertFalse(test('c.c'))
self.assertFalse(test('b'))
self.assertFalse(test('d'))
def test_parse_checkers(self):
lines = [
'OVERVIEW: Clang Static Analyzer Checkers List',
'',
'CHECKERS:',
' checker.one Checker One description',
' checker.two',
' Checker Two description']
result = dict(sut.parse_checkers(lines))
self.assertTrue('checker.one' in result)
self.assertEqual('Checker One description', result.get('checker.one'))
self.assertTrue('checker.two' in result)
self.assertEqual('Checker Two description', result.get('checker.two'))