Fix bugs in EOL marking in command line tokenizers

Add unit tests for this behavior, since the integration test for
clang-cl did not catch these bugs.

Fixes PR47604

Differential Revision: https://reviews.llvm.org/D90866
This commit is contained in:
Reid Kleckner 2020-11-05 09:58:02 -08:00
parent d2e7dca5ca
commit f55247456e
2 changed files with 74 additions and 38 deletions

View File

@ -832,7 +832,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
// Consume runs of whitespace.
if (Token.empty()) {
while (I != E && isWhitespace(Src[I])) {
// Mark the end of lines in response files
// Mark the end of lines in response files.
if (MarkEOLs && Src[I] == '\n')
NewArgv.push_back(nullptr);
++I;
@ -869,6 +869,9 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
if (isWhitespace(C)) {
if (!Token.empty())
NewArgv.push_back(Saver.save(StringRef(Token)).data());
// Mark the end of lines in response files.
if (MarkEOLs && C == '\n')
NewArgv.push_back(nullptr);
Token.clear();
continue;
}
@ -880,9 +883,6 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
// Append the last token after hitting EOF with no whitespace.
if (!Token.empty())
NewArgv.push_back(Saver.save(StringRef(Token)).data());
// Mark the end of response files
if (MarkEOLs)
NewArgv.push_back(nullptr);
}
/// Backslashes are interpreted in a rather complicated way in the Windows-style
@ -956,11 +956,11 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver,
++I;
StringRef NormalChars = Src.slice(Start, I);
if (I >= E || isWhitespaceOrNull(Src[I])) {
if (I < E && Src[I] == '\n')
MarkEOL();
// No special characters: slice out the substring and start the next
// token. Copy the string if the caller asks us to.
AddToken(AlwaysCopy ? Saver.save(NormalChars) : NormalChars);
if (I < E && Src[I] == '\n')
MarkEOL();
} else if (Src[I] == '\"') {
Token += NormalChars;
State = QUOTED;

View File

@ -199,14 +199,15 @@ typedef void ParserFunction(StringRef Source, StringSaver &Saver,
bool MarkEOLs);
void testCommandLineTokenizer(ParserFunction *parse, StringRef Input,
const char *const Output[], size_t OutputSize) {
ArrayRef<const char *> Output,
bool MarkEOLs = false) {
SmallVector<const char *, 0> Actual;
BumpPtrAllocator A;
StringSaver Saver(A);
parse(Input, Saver, Actual, /*MarkEOLs=*/false);
EXPECT_EQ(OutputSize, Actual.size());
parse(Input, Saver, Actual, MarkEOLs);
EXPECT_EQ(Output.size(), Actual.size());
for (unsigned I = 0, E = Actual.size(); I != E; ++I) {
if (I < OutputSize) {
if (I < Output.size()) {
EXPECT_STREQ(Output[I], Actual[I]);
}
}
@ -219,8 +220,7 @@ TEST(CommandLineTest, TokenizeGNUCommandLine) {
const char *const Output[] = {
"foo bar", "foo bar", "foo bar", "foo\\bar",
"-DFOO=bar()", "foobarbaz", "C:\\src\\foo.cpp", "C:srcfoo.cpp"};
testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output,
array_lengthof(Output));
testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output);
}
TEST(CommandLineTest, TokenizeWindowsCommandLine1) {
@ -228,75 +228,85 @@ TEST(CommandLineTest, TokenizeWindowsCommandLine1) {
R"(a\b c\\d e\\"f g" h\"i j\\\"k "lmn" o pqr "st \"u" \v)";
const char *const Output[] = { "a\\b", "c\\\\d", "e\\f g", "h\"i", "j\\\"k",
"lmn", "o", "pqr", "st \"u", "\\v" };
testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output,
array_lengthof(Output));
testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output);
}
TEST(CommandLineTest, TokenizeWindowsCommandLine2) {
const char Input[] = "clang -c -DFOO=\"\"\"ABC\"\"\" x.cpp";
const char *const Output[] = { "clang", "-c", "-DFOO=\"ABC\"", "x.cpp"};
testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output,
array_lengthof(Output));
testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output);
}
TEST(CommandLineTest, TokenizeWindowsCommandLineQuotedLastArgument) {
const char Input1[] = R"(a b c d "")";
const char *const Output1[] = {"a", "b", "c", "d", ""};
testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input1, Output1,
array_lengthof(Output1));
testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input1, Output1);
const char Input2[] = R"(a b c d ")";
const char *const Output2[] = {"a", "b", "c", "d"};
testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input2, Output2,
array_lengthof(Output2));
testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input2, Output2);
}
TEST(CommandLineTest, TokenizeAndMarkEOLs) {
// Clang uses EOL marking in response files to support options that consume
// the rest of the arguments on the current line, but do not consume arguments
// from subsequent lines. For example, given these rsp files contents:
// /c /Zi /O2
// /Oy- /link /debug /opt:ref
// /Zc:ThreadsafeStatics-
//
// clang-cl needs to treat "/debug /opt:ref" as linker flags, and everything
// else as compiler flags. The tokenizer inserts nullptr sentinels into the
// output so that clang-cl can find the end of the current line.
const char Input[] = "clang -Xclang foo\n\nfoo\"bar\"baz\n x.cpp\n";
const char *const Output[] = {"clang", "-Xclang", "foo",
nullptr, nullptr, "foobarbaz",
nullptr, "x.cpp", nullptr};
testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output,
/*MarkEOLs=*/true);
testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output,
/*MarkEOLs=*/true);
}
TEST(CommandLineTest, TokenizeConfigFile1) {
const char *Input = "\\";
const char *const Output[] = { "\\" };
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
array_lengthof(Output));
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile2) {
const char *Input = "\\abc";
const char *const Output[] = { "abc" };
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
array_lengthof(Output));
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile3) {
const char *Input = "abc\\";
const char *const Output[] = { "abc\\" };
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
array_lengthof(Output));
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile4) {
const char *Input = "abc\\\n123";
const char *const Output[] = { "abc123" };
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
array_lengthof(Output));
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile5) {
const char *Input = "abc\\\r\n123";
const char *const Output[] = { "abc123" };
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
array_lengthof(Output));
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile6) {
const char *Input = "abc\\\n";
const char *const Output[] = { "abc" };
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
array_lengthof(Output));
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile7) {
const char *Input = "abc\\\r\n";
const char *const Output[] = { "abc" };
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
array_lengthof(Output));
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile8) {
@ -318,15 +328,13 @@ TEST(CommandLineTest, TokenizeConfigFile9) {
TEST(CommandLineTest, TokenizeConfigFile10) {
const char *Input = "\\\nabc";
const char *const Output[] = { "abc" };
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
array_lengthof(Output));
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile11) {
const char *Input = "\\\r\nabc";
const char *const Output[] = { "abc" };
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
array_lengthof(Output));
testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, AliasesWithArguments) {
@ -962,6 +970,34 @@ TEST(CommandLineTest, ResponseFileRelativePath) {
testing::Pointwise(StringEquality(), {"test/test", "-flag"}));
}
TEST(CommandLineTest, ResponseFileEOLs) {
vfs::InMemoryFileSystem FS;
#ifdef _WIN32
const char *TestRoot = "C:\\";
#else
const char *TestRoot = "//net";
#endif
FS.setCurrentWorkingDirectory(TestRoot);
FS.addFile("eols.rsp", 0,
MemoryBuffer::getMemBuffer("-Xclang -Wno-whatever\n input.cpp"));
SmallVector<const char *, 2> Argv = {"clang", "@eols.rsp"};
BumpPtrAllocator A;
StringSaver Saver(A);
ASSERT_TRUE(cl::ExpandResponseFiles(Saver, cl::TokenizeWindowsCommandLine,
Argv, true, true, FS,
/*CurrentDir=*/StringRef(TestRoot)));
const char *Expected[] = {"clang", "-Xclang", "-Wno-whatever", nullptr,
"input.cpp"};
ASSERT_EQ(array_lengthof(Expected), Argv.size());
for (size_t I = 0, E = array_lengthof(Expected); I < E; ++I) {
if (Expected[I] == nullptr) {
ASSERT_EQ(Argv[I], nullptr);
} else {
ASSERT_STREQ(Expected[I], Argv[I]);
}
}
}
TEST(CommandLineTest, SetDefautValue) {
cl::ResetCommandLineParser();