[analyzer][PlistMacroExpansion] Part 3.: Macro arguments are expanded

This part focuses on expanding macro arguments.

Differential Revision: https://reviews.llvm.org/D52795

llvm-svn: 347629
This commit is contained in:
Kristof Umann 2018-11-27 02:28:23 +00:00
parent 0b35afd79d
commit 08d92e4a10
3 changed files with 4111 additions and 111 deletions

View File

@ -723,15 +723,24 @@ void PlistDiagnostics::FlushDiagnosticsImpl(
namespace {
struct MacroNameAndInfo {
std::string Name;
const MacroInfo *MI = nullptr;
using ExpArgTokens = llvm::SmallVector<Token, 2>;
MacroNameAndInfo(std::string N, const MacroInfo *MI)
: Name(std::move(N)), MI(MI) {}
/// Maps unexpanded macro arguments to expanded arguments. A macro argument may
/// need to expanded further when it is nested inside another macro.
class MacroArgMap : public std::map<const IdentifierInfo *, ExpArgTokens> {
public:
void expandFromPrevMacro(const MacroArgMap &Super);
};
struct MacroNameAndArgs {
std::string Name;
const MacroInfo *MI = nullptr;
MacroArgMap Args;
MacroNameAndArgs(std::string N, const MacroInfo *MI, MacroArgMap M)
: Name(std::move(N)), MI(MI), Args(std::move(M)) {}
};
/// Helper class for printing tokens.
class TokenPrinter {
llvm::raw_ostream &OS;
const Preprocessor &PP;
@ -751,12 +760,51 @@ public:
} // end of anonymous namespace
/// The implementation method of getMacroExpansion: It prints the expansion of
/// a macro to \p Printer, and returns with the name of the macro.
///
/// Since macros can be nested in one another, this function may call itself
/// recursively.
///
/// Unfortunately, macro arguments have to expanded manually. To understand why,
/// observe the following example:
///
/// #define PRINT(x) print(x)
/// #define DO_SOMETHING(str) PRINT(str)
///
/// DO_SOMETHING("Cute panda cubs.");
///
/// As we expand the last line, we'll immediately replace PRINT(str) with
/// print(x). The information that both 'str' and 'x' refers to the same string
/// is an information we have to forward, hence the argument \p PrevArgs.
static std::string getMacroNameAndPrintExpansion(TokenPrinter &Printer,
SourceLocation MacroLoc,
const Preprocessor &PP);
const Preprocessor &PP,
const MacroArgMap &PrevArgs);
/// Retrieves the name of the macro and its MacroInfo.
static MacroNameAndInfo getMacroNameAndInfo(SourceLocation ExpanLoc,
/// Retrieves the name of the macro and what it's arguments expand into
/// at \p ExpanLoc.
///
/// For example, for the following macro expansion:
///
/// #define SET_TO_NULL(x) x = 0
/// #define NOT_SUSPICIOUS(a) \
/// { \
/// int b = 0; \
/// } \
/// SET_TO_NULL(a)
///
/// int *ptr = new int(4);
/// NOT_SUSPICIOUS(&ptr);
/// *ptr = 5;
///
/// When \p ExpanLoc references the last line, the macro name "NOT_SUSPICIOUS"
/// and the MacroArgMap map { (a, &ptr) } will be returned.
///
/// When \p ExpanLoc references "SET_TO_NULL(a)" within the definition of
/// "NOT_SUSPICOUS", the macro name "SET_TO_NULL" and the MacroArgMap map
/// { (x, a) } will be returned.
static MacroNameAndArgs getMacroNameAndArgs(SourceLocation ExpanLoc,
const Preprocessor &PP);
/// Retrieves the ')' token that matches '(' \p It points to.
@ -781,21 +829,26 @@ static ExpansionInfo getExpandedMacro(SourceLocation MacroLoc,
llvm::SmallString<200> ExpansionBuf;
llvm::raw_svector_ostream OS(ExpansionBuf);
TokenPrinter Printer(OS, PP);
std::string MacroName = getMacroNameAndPrintExpansion(Printer, MacroLoc, PP);
std::string MacroName =
getMacroNameAndPrintExpansion(Printer, MacroLoc, PP, MacroArgMap{});
return { MacroName, OS.str() };
}
static std::string getMacroNameAndPrintExpansion(TokenPrinter &Printer,
SourceLocation MacroLoc,
const Preprocessor &PP) {
const Preprocessor &PP,
const MacroArgMap &PrevArgs) {
const SourceManager &SM = PP.getSourceManager();
MacroNameAndInfo Info = getMacroNameAndInfo(SM.getExpansionLoc(MacroLoc), PP);
const MacroInfo *MI = Info.MI;
MacroNameAndArgs Info = getMacroNameAndArgs(SM.getExpansionLoc(MacroLoc), PP);
// Manually expand its arguments from the previous macro.
Info.Args.expandFromPrevMacro(PrevArgs);
// Iterate over the macro's tokens and stringify them.
for (auto It = MI->tokens_begin(), E = MI->tokens_end(); It != E; ++It) {
for (auto It = Info.MI->tokens_begin(), E = Info.MI->tokens_end(); It != E;
++It) {
Token T = *It;
// If this token is not an identifier, we only need to print it.
@ -812,7 +865,7 @@ static std::string getMacroNameAndPrintExpansion(TokenPrinter &Printer,
// macro.
if (const MacroInfo *MI =
getMacroInfoForLocation(PP, SM, II, T.getLocation())) {
getMacroNameAndPrintExpansion(Printer, T.getLocation(), PP);
getMacroNameAndPrintExpansion(Printer, T.getLocation(), PP, Info.Args);
// If this is a function-like macro, skip its arguments, as
// getExpandedMacro() already printed them. If this is the case, let's
@ -822,15 +875,46 @@ static std::string getMacroNameAndPrintExpansion(TokenPrinter &Printer,
continue;
}
// If control reached here, then this token isn't a macro identifier, print
// it.
// If this token is the current macro's argument, we should expand it.
auto ArgMapIt = Info.Args.find(II);
if (ArgMapIt != Info.Args.end()) {
for (MacroInfo::tokens_iterator ArgIt = ArgMapIt->second.begin(),
ArgEnd = ArgMapIt->second.end();
ArgIt != ArgEnd; ++ArgIt) {
// These tokens may still be macros, if that is the case, handle it the
// same way we did above.
const auto *ArgII = ArgIt->getIdentifierInfo();
if (!ArgII) {
Printer.printToken(*ArgIt);
continue;
}
const auto *MI = PP.getMacroInfo(ArgII);
if (!MI) {
Printer.printToken(*ArgIt);
continue;
}
getMacroNameAndPrintExpansion(Printer, ArgIt->getLocation(), PP,
Info.Args);
if (MI->getNumParams() != 0)
ArgIt = getMatchingRParen(++ArgIt, ArgEnd);
}
continue;
}
// TODO: Handle tok::hash and tok::hashhash.
// If control reached here, then this token isn't a macro identifier, nor an
// unexpanded macro argument that we need to handle, print it.
Printer.printToken(T);
}
return Info.Name;
}
static MacroNameAndInfo getMacroNameAndInfo(SourceLocation ExpanLoc,
static MacroNameAndArgs getMacroNameAndArgs(SourceLocation ExpanLoc,
const Preprocessor &PP) {
const SourceManager &SM = PP.getSourceManager();
@ -857,7 +941,66 @@ static MacroNameAndInfo getMacroNameAndInfo(SourceLocation ExpanLoc,
const MacroInfo *MI = getMacroInfoForLocation(PP, SM, II, ExpanLoc);
assert(MI && "The macro must've been defined at it's expansion location!");
return { MacroName, MI };
// Acquire the macro's arguments.
//
// The rough idea here is to lex from the first left parentheses to the last
// right parentheses, and map the macro's unexpanded arguments to what they
// will be expanded to. An expanded macro argument may contain several tokens
// (like '3 + 4'), so we'll lex until we find a tok::comma or tok::r_paren, at
// which point we start lexing the next argument or finish.
ArrayRef<const IdentifierInfo *> MacroArgs = MI->params();
if (MacroArgs.empty())
return { MacroName, MI, {} };
RawLexer.LexFromRawLexer(TheTok);
assert(TheTok.is(tok::l_paren) &&
"The token after the macro's identifier token should be '('!");
MacroArgMap Args;
// When the macro's argument is a function call, like
// CALL_FN(someFunctionName(param1, param2))
// we will find tok::l_paren, tok::r_paren, and tok::comma that do not divide
// actual macro arguments, or do not represent the macro argument's closing
// parentheses, so we'll count how many parentheses aren't closed yet.
int ParenthesesDepth = 1;
for (const IdentifierInfo *UnexpArgII : MacroArgs) {
MacroArgMap::mapped_type ExpandedArgTokens;
// Lex the first token of the next macro parameter.
RawLexer.LexFromRawLexer(TheTok);
while (TheTok.isNot(tok::comma) || ParenthesesDepth != 1) {
assert(TheTok.isNot(tok::eof) &&
"EOF encountered while looking for expanded macro args!");
if (TheTok.is(tok::l_paren))
++ParenthesesDepth;
if (TheTok.is(tok::r_paren))
--ParenthesesDepth;
if (ParenthesesDepth == 0)
break;
if (TheTok.is(tok::raw_identifier))
PP.LookUpIdentifierInfo(TheTok);
ExpandedArgTokens.push_back(TheTok);
RawLexer.LexFromRawLexer(TheTok);
}
Args.emplace(UnexpArgII, std::move(ExpandedArgTokens));
}
// TODO: The condition really should be TheTok.is(tok::r_paren), but variadic
// macro arguments are not handled yet.
assert(TheTok.isOneOf(tok::r_paren, tok::comma) &&
"Expanded macro argument acquisition failed! After the end of the loop"
" this token should be ')'!");
return { MacroName, MI, Args };
}
static MacroInfo::tokens_iterator getMatchingRParen(
@ -867,8 +1010,8 @@ static MacroInfo::tokens_iterator getMatchingRParen(
assert(It->is(tok::l_paren) && "This token should be '('!");
// Skip until we find the closing ')'.
int ParanthesesDepth = 1;
while (ParanthesesDepth != 0) {
int ParenthesesDepth = 1;
while (ParenthesesDepth != 0) {
++It;
assert(It->isNot(tok::eof) &&
@ -877,10 +1020,10 @@ static MacroInfo::tokens_iterator getMatchingRParen(
"End of the macro definition reached before finding ')'!");
if (It->is(tok::l_paren))
++ParanthesesDepth;
++ParenthesesDepth;
if (It->is(tok::r_paren))
--ParanthesesDepth;
--ParenthesesDepth;
}
return It;
}
@ -897,6 +1040,38 @@ static const MacroInfo *getMacroInfoForLocation(const Preprocessor &PP,
return MD->findDirectiveAtLoc(Loc, SM).getMacroInfo();
}
void MacroArgMap::expandFromPrevMacro(const MacroArgMap &Super) {
for (value_type &Pair : *this) {
ExpArgTokens &CurrExpArgTokens = Pair.second;
// For each token in the expanded macro argument.
auto It = CurrExpArgTokens.begin();
while (It != CurrExpArgTokens.end()) {
if (It->isNot(tok::identifier)) {
++It;
continue;
}
const auto *II = It->getIdentifierInfo();
assert(II);
// Is this an argument that "Super" expands further?
if (!Super.count(II)) {
++It;
continue;
}
const ExpArgTokens &SuperExpArgTokens = Super.at(II);
It = CurrExpArgTokens.insert(
It, SuperExpArgTokens.begin(), SuperExpArgTokens.end());
std::advance(It, SuperExpArgTokens.size());
It = CurrExpArgTokens.erase(It);
}
}
}
void TokenPrinter::printToken(const Token &Tok) {
// If the tokens were already space separated, or if they must be to avoid
// them being implicitly pasted, add a space between them.

View File

@ -60,9 +60,8 @@ void functionLikeMacroTest() {
*ptr = 5; // expected-warning{{Dereference of null pointer}}
}
// TODO: Expand arguments.
// CHECK: <key>name</key><string>TO_NULL</string>
// CHECK: <key>expansion</key><string>setToNull(x)</string>
// CHECK-NEXT: <key>expansion</key><string>setToNull(&amp;ptr)</string>
#define DOES_NOTHING(x) \
{ \
@ -81,13 +80,11 @@ void functionLikeNestedMacroTest() {
DEREF(a) = 5; // expected-warning{{Dereference of null pointer}}
}
// TODO: Expand arguments.
// CHECK: <key>name</key><string>TO_NULL</string>
// CHECK-NEXT: <key>expansion</key><string>setToNull(x)</string>
// CHECK-NEXT: <key>expansion</key><string>setToNull(&amp;a)</string>
// TODO: Expand arguments.
// CHECK: <key>name</key><string>DEREF</string>
// CHECK-NEXT: <key>expansion</key><string>{ int b; b = 5; } print(x); *x</string>
// CHECK-NEXT: <key>expansion</key><string>{ int b; b = 5; } print(a); *a</string>
//===----------------------------------------------------------------------===//
// Tests for undefining and/or redifining macros.
@ -104,7 +101,6 @@ void undefinedMacroByTheEndOfParsingTest() {
#undef WILL_UNDEF_SET_NULL_TO_PTR
// TODO: Expand arguments.
// CHECK: <key>name</key><string>WILL_UNDEF_SET_NULL_TO_PTR</string>
// CHECK-NEXT: <key>expansion</key><string>ptr = nullptr;</string>
@ -125,7 +121,6 @@ void macroRedefinedMultipleTimesTest() {
print("This string shouldn't be in the plist file at all. Or anywhere, " \
"but here.");
// TODO: Expand arguments.
// CHECK: <key>name</key><string>WILL_REDIFINE_MULTIPLE_TIMES_SET_TO_NULL</string>
// CHECK-NEXT: <key>expansion</key><string>ptr = nullptr;</string>
@ -146,3 +141,276 @@ void undefinedMacroInsideAnotherMacroTest() {
// CHECK-NEXT: <key>expansion</key><string>ptr = nullptr;</string>
#undef WILL_UNDEF_SET_NULL_TO_PTR_2
//===----------------------------------------------------------------------===//
// Tests for macro arguments containing commas and parantheses.
//
// As of writing these tests, the algorithm expands macro arguments by lexing
// the macro's expansion location, and relies on finding tok::comma and
// tok::l_paren/tok::r_paren.
//===----------------------------------------------------------------------===//
// Note that this commas, parantheses in strings aren't parsed as tok::comma or
// tok::l_paren/tok::r_paren, but why not test them.
#define TO_NULL_AND_PRINT(x, str) \
x = 0; \
print(str)
void macroArgContainsCommaInStringTest() {
int *a;
TO_NULL_AND_PRINT(a, "Will this , cause a crash?");
*a = 5; // expected-warning{{Dereference of null pointer}}
}
// CHECK: <key>name</key><string>TO_NULL_AND_PRINT</string>
// CHECK-NEXT: <key>expansion</key><string>a = 0; print( &quot;Will this , cause a crash?&quot;)</string>
void macroArgContainsLParenInStringTest() {
int *a;
TO_NULL_AND_PRINT(a, "Will this ( cause a crash?");
*a = 5; // expected-warning{{Dereference of null pointer}}
}
// CHECK: <key>name</key><string>TO_NULL_AND_PRINT</string>
// CHECK-NEXT: <key>expansion</key><string>a = 0; print( &quot;Will this ( cause a crash?&quot;)</string>
void macroArgContainsRParenInStringTest() {
int *a;
TO_NULL_AND_PRINT(a, "Will this ) cause a crash?");
*a = 5; // expected-warning{{Dereference of null pointer}}
}
// CHECK: <key>name</key><string>TO_NULL_AND_PRINT</string>
// CHECK-NEXT: <key>expansion</key><string>a = 0; print( &quot;Will this ) cause a crash?&quot;)</string>
#define CALL_FUNCTION(funcCall) \
funcCall
// Function calls do contain both tok::comma and tok::l_paren/tok::r_paren.
void macroArgContainsLParenRParenTest() {
int *a;
CALL_FUNCTION(setToNull(&a));
*a = 5; // expected-warning{{Dereference of null pointer}}
}
// CHECK: <key>name</key><string>CALL_FUNCTION</string>
// CHECK-NEXT: <key>expansion</key><string>setToNull(&amp;a)</string>
void setToNullAndPrint(int **vptr, const char *str) {
setToNull(vptr);
print(str);
}
void macroArgContainsCommaLParenRParenTest() {
int *a;
CALL_FUNCTION(setToNullAndPrint(&a, "Hello!"));
*a = 5; // expected-warning{{Dereference of null pointer}}
}
// CHECK: <key>name</key><string>CALL_FUNCTION</string>
// CHECK-NEXT: <key>expansion</key><string>setToNullAndPrint(&amp;a, &quot;Hello!&quot;)</string>
#define CALL_FUNCTION_WITH_TWO_PARAMS(funcCall, param1, param2) \
funcCall(param1, param2)
void macroArgContainsCommaLParenRParenTest2() {
int *a;
CALL_FUNCTION_WITH_TWO_PARAMS(setToNullAndPrint, &a, "Hello!");
*a = 5; // expected-warning{{Dereference of null pointer}}
}
// CHECK: <key>name</key><string>CALL_FUNCTION_WITH_TWO_PARAMS</string>
// CHECK-NEXT: <key>expansion</key><string>setToNullAndPrint( &amp;a, &quot;Hello!&quot;)</string>
#define CALL_LAMBDA(l) \
l()
void commaInBracketsTest() {
int *ptr;
const char str[] = "Hello!";
// You need to add parantheses around a lambda expression to compile this,
// else the comma in the capture will be parsed as divider of macro args.
CALL_LAMBDA(([&ptr, str] () mutable { TO_NULL(&ptr); }));
*ptr = 5; // expected-warning{{Dereference of null pointer}}
}
// CHECK: <key>name</key><string>CALL_LAMBDA</string>
// CHECK-NEXT: <key>expansion</key><string>([&amp;ptr, str] () mutable { setToNull(&amp;ptr); })()</string>
#define PASTE_CODE(code) \
code
void commaInBracesTest() {
PASTE_CODE({ // expected-warning{{Dereference of null pointer}}
// NOTE: If we were to add a new variable here after a comma, we'd get a
// compilation error, so this test is mainly here to show that this was also
// investigated.
// int *ptr = nullptr, a;
int *ptr = nullptr;
*ptr = 5;
})
}
// CHECK: <key>name</key><string>PASTE_CODE</string>
// CHECK-NEXT: <key>expansion</key><string>{ int *ptr = nullptr; *ptr = 5; }</string>
// Example taken from
// https://gcc.gnu.org/onlinedocs/cpp/Macro-Arguments.html#Macro-Arguments.
#define POTENTIALLY_EMPTY_PARAM(x, y) \
x; \
y = nullptr
void emptyParamTest() {
int *ptr;
POTENTIALLY_EMPTY_PARAM(,ptr);
*ptr = 5; // expected-warning{{Dereference of null pointer}}
}
// CHECK: <key>name</key><string>POTENTIALLY_EMPTY_PARAM</string>
// CHECK-NEXT: <key>expansion</key><string>;ptr = nullptr</string>
#define NESTED_EMPTY_PARAM(a, b) \
POTENTIALLY_EMPTY_PARAM(a, b);
void nestedEmptyParamTest() {
int *ptr;
NESTED_EMPTY_PARAM(, ptr);
*ptr = 5; // expected-warning{{Dereference of null pointer}}
}
// CHECK: <key>name</key><string>NESTED_EMPTY_PARAM</string>
// CHECK-NEXT: <key>expansion</key><string>; ptr = nullptr;</string>
#define CALL_FUNCTION_WITH_ONE_PARAM_THROUGH_MACRO(func, param) \
CALL_FUNCTION(func(param))
void lParenRParenInNestedMacro() {
int *ptr;
CALL_FUNCTION_WITH_ONE_PARAM_THROUGH_MACRO(setToNull, &ptr);
*ptr = 5; // expected-warning{{Dereference of null pointer}}
}
// CHECK: <key>name</key><string>CALL_FUNCTION_WITH_ONE_PARAM_THROUGH_MACRO</string>
// CHECK-NEXT: <key>expansion</key><string>setToNull( &amp;ptr)</string>
//===----------------------------------------------------------------------===//
// Tests for variadic macro arguments.
//===----------------------------------------------------------------------===//
template <typename ...Args>
void variadicFunc(Args ...args);
#define VARIADIC_SET_TO_NULL(ptr, ...) \
ptr = nullptr; \
variadicFunc(__VA_ARGS__)
void variadicMacroArgumentTest() {
int *ptr;
VARIADIC_SET_TO_NULL(ptr, 1, 5, "haha!");
*ptr = 5; // expected-warning{{Dereference of null pointer}}
}
// TODO: Should correctly display the rest of the parameters.
// CHECK: <key>name</key><string>VARIADIC_SET_TO_NULL</string>
// CHECK-NEXT: <key>expansion</key><string>ptr = nullptr; variadicFunc( 1)</string>
//===----------------------------------------------------------------------===//
// Tests for # and ##.
//===----------------------------------------------------------------------===//
#define DECLARE_FUNC_AND_SET_TO_NULL(funcName, ptr) \
void generated_##funcName(); \
ptr = nullptr;
void hashHashOperatorTest() {
int *ptr;
DECLARE_FUNC_AND_SET_TO_NULL(whatever, ptr);
*ptr = 5; // expected-warning{{Dereference of null pointer}}
}
// TODO: Should expand correctly.
// CHECK: <key>name</key><string>DECLARE_FUNC_AND_SET_TO_NULL</string>
// CHECK-NEXT: <key>expansion</key><string>void generated_##whatever(); ptr = nullptr;</string>
#define PRINT_STR(str, ptr) \
print(#str); \
ptr = nullptr
void hashOperatorTest() {
int *ptr;
PRINT_STR(Hello, ptr);
*ptr = 5; // expected-warning{{Dereference of null pointer}}
}
// TODO: Should expand correctly.
// CHECK: <key>name</key><string>PRINT_STR</string>
// CHECK-NEXT: <key>expansion</key><string>print(#Hello); ptr = nullptr</string>
//===----------------------------------------------------------------------===//
// Tests for more complex macro expansions.
//
// We won't cover anything that wasn't covered up to this point, but rather
// show more complex, macros with deeper nesting, more arguments (some unused)
// and so on.
//===----------------------------------------------------------------------===//
#define IF(Condition) \
if ( Condition )
#define L_BRACE {
#define R_BRACE }
#define LESS <
#define GREATER >
#define EQUALS =
#define SEMICOLON ;
#define NEGATIVE -
#define RETURN return
#define ZERO 0
#define EUCLIDEAN_ALGORITHM(A, B) \
IF(A LESS ZERO) L_BRACE \
A EQUALS NEGATIVE A SEMICOLON \
R_BRACE \
IF(B LESS ZERO) L_BRACE \
B EQUALS NEGATIVE B SEMICOLON \
R_BRACE \
\
/* This is where a while loop would be, but that seems to be too complex */ \
/* for the analyzer just yet. Let's just pretend that this algorithm */ \
/* works. */ \
\
RETURN B / (B - B) SEMICOLON
int getLowestCommonDenominator(int A, int B) {
EUCLIDEAN_ALGORITHM(A, B) // expected-warning{{Division by zero}}
}
void testVeryComplexAlgorithm() {
int tmp = 8 / (getLowestCommonDenominator(5, 7) - 1);
print(&tmp);
}
// CHECK: <key>name</key><string>EUCLIDEAN_ALGORITHM</string>
// CHECK-NEXT: <key>expansion</key><string>if (A&lt;0 ){A=-A;} if ( B&lt;0 ){ B=- B;}return B / ( B - B);</string>
#define YET_ANOTHER_SET_TO_NULL(x, y, z) \
print((void *) x); \
print((void *) y); \
z = nullptr;
#define DO_NOTHING(str) str
#define DO_NOTHING2(str2) DO_NOTHING(str2)
void test() {
int *ptr;
YET_ANOTHER_SET_TO_NULL(5, DO_NOTHING2("Remember the Vasa"), ptr);
*ptr = 5; // expected-warning{{Dereference of null pointer}}
}
// CHECK: <key>name</key><string>YET_ANOTHER_SET_TO_NULL</string>
// CHECK-NEXT: <key>expansion</key><string>print((void *)5); print((void *)&quot;Remember the Vasa&quot;); ptr = nullptr;</string>