diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index d1901db7c68e..f043c112861b 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -1862,6 +1862,33 @@ using TokenizerCallback = void (*)(StringRef Source, StringSaver &Saver, SmallVectorImpl &NewArgv, bool MarkEOLs); +/// Tokenizes content of configuration file. +/// +/// \param [in] Source The string representing content of config file. +/// \param [in] Saver Delegates back to the caller for saving parsed strings. +/// \param [out] NewArgv All parsed strings are appended to NewArgv. +/// \param [in] MarkEOLs Added for compatibility with TokenizerCallback. +/// +/// It works like TokenizeGNUCommandLine with ability to skip comment lines. +/// +void tokenizeConfigFile(StringRef Source, StringSaver &Saver, + SmallVectorImpl &NewArgv, + bool MarkEOLs = false); + +/// Reads command line options from the given configuration file. +/// +/// \param [in] CfgFileName Path to configuration file. +/// \param [in] Saver Objects that saves allocated strings. +/// \param [out] Argv Array to which the read options are added. +/// \return true if the file was successfully read. +/// +/// It reads content of the specified file, tokenizes it and expands "@file" +/// commands resolving file names in them relative to the directory where +/// CfgFilename resides. +/// +bool readConfigFile(StringRef CfgFileName, StringSaver &Saver, + SmallVectorImpl &Argv); + /// \brief Expand response files on a command line recursively using the given /// StringSaver and tokenization strategy. Argv should contain the command line /// before expansion and will be modified in place. If requested, Argv will diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 4caf4a4fdce0..d95b791972c8 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -873,6 +873,45 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, NewArgv.push_back(nullptr); } +void cl::tokenizeConfigFile(StringRef Source, StringSaver &Saver, + SmallVectorImpl &NewArgv, + bool MarkEOLs) { + for (const char *Cur = Source.begin(); Cur != Source.end();) { + SmallString<128> Line; + // Check for comment line. + if (isWhitespace(*Cur)) { + while (Cur != Source.end() && isWhitespace(*Cur)) + ++Cur; + continue; + } + if (*Cur == '#') { + while (Cur != Source.end() && *Cur != '\n') + ++Cur; + continue; + } + // Find end of the current line. + const char *Start = Cur; + for (const char *End = Source.end(); Cur != End; ++Cur) { + if (*Cur == '\\') { + if (Cur + 1 != End) { + ++Cur; + if (*Cur == '\n' || + (*Cur == '\r' && (Cur + 1 != End) && Cur[1] == '\n')) { + Line.append(Start, Cur - 1); + if (*Cur == '\r') + ++Cur; + Start = Cur + 1; + } + } + } else if (*Cur == '\n') + break; + } + // Tokenize line. + Line.append(Start, Cur); + cl::TokenizeGNUCommandLine(Line, Saver, NewArgv, MarkEOLs); + } +} + // It is called byte order marker but the UTF-8 BOM is actually not affected // by the host system's endianness. static bool hasUTF8ByteOrderMark(ArrayRef S) { @@ -977,6 +1016,15 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, return AllExpanded; } +bool cl::readConfigFile(StringRef CfgFile, StringSaver &Saver, + SmallVectorImpl &Argv) { + if (!ExpandResponseFile(CfgFile, Saver, cl::tokenizeConfigFile, Argv, + /*MarkEOLs*/ false, /*RelativeNames*/ true)) + return false; + return ExpandResponseFiles(Saver, cl::tokenizeConfigFile, Argv, + /*MarkEOLs*/ false, /*RelativeNames*/ true); +} + /// ParseEnvironmentOptions - An alternative entry point to the /// CommandLine library, which allows you to read the program's name /// from the caller (as PROGNAME) and its command-line arguments from diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp index 1fb0213b4d18..36ff4e247e9f 100644 --- a/llvm/unittests/Support/CommandLineTest.cpp +++ b/llvm/unittests/Support/CommandLineTest.cpp @@ -207,6 +207,85 @@ TEST(CommandLineTest, TokenizeWindowsCommandLine) { array_lengthof(Output)); } +TEST(CommandLineTest, TokenizeConfigFile1) { + const char *Input = "\\"; + const char *const Output[] = { "\\" }; + testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output, + array_lengthof(Output)); +} + +TEST(CommandLineTest, TokenizeConfigFile2) { + const char *Input = "\\abc"; + const char *const Output[] = { "abc" }; + testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output, + array_lengthof(Output)); +} + +TEST(CommandLineTest, TokenizeConfigFile3) { + const char *Input = "abc\\"; + const char *const Output[] = { "abc\\" }; + testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output, + array_lengthof(Output)); +} + +TEST(CommandLineTest, TokenizeConfigFile4) { + const char *Input = "abc\\\n123"; + const char *const Output[] = { "abc123" }; + testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output, + array_lengthof(Output)); +} + +TEST(CommandLineTest, TokenizeConfigFile5) { + const char *Input = "abc\\\r\n123"; + const char *const Output[] = { "abc123" }; + testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output, + array_lengthof(Output)); +} + +TEST(CommandLineTest, TokenizeConfigFile6) { + const char *Input = "abc\\\n"; + const char *const Output[] = { "abc" }; + testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output, + array_lengthof(Output)); +} + +TEST(CommandLineTest, TokenizeConfigFile7) { + const char *Input = "abc\\\r\n"; + const char *const Output[] = { "abc" }; + testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output, + array_lengthof(Output)); +} + +TEST(CommandLineTest, TokenizeConfigFile8) { + SmallVector Actual; + BumpPtrAllocator A; + StringSaver Saver(A); + cl::tokenizeConfigFile("\\\n", Saver, Actual, /*MarkEOLs=*/false); + EXPECT_TRUE(Actual.empty()); +} + +TEST(CommandLineTest, TokenizeConfigFile9) { + SmallVector Actual; + BumpPtrAllocator A; + StringSaver Saver(A); + cl::tokenizeConfigFile("\\\r\n", Saver, Actual, /*MarkEOLs=*/false); + EXPECT_TRUE(Actual.empty()); +} + +TEST(CommandLineTest, TokenizeConfigFile10) { + const char *Input = "\\\nabc"; + const char *const Output[] = { "abc" }; + testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output, + array_lengthof(Output)); +} + +TEST(CommandLineTest, TokenizeConfigFile11) { + const char *Input = "\\\r\nabc"; + const char *const Output[] = { "abc" }; + testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output, + array_lengthof(Output)); +} + TEST(CommandLineTest, AliasesWithArguments) { static const size_t ARGC = 3; const char *const Inputs[][ARGC] = { @@ -648,4 +727,58 @@ TEST(CommandLineTest, SetDefautValue) { EXPECT_TRUE(Opt3 == 3); } +TEST(CommandLineTest, ReadConfigFile) { + llvm::SmallVector Argv; + + llvm::SmallString<128> TestDir; + std::error_code EC = + llvm::sys::fs::createUniqueDirectory("unittest", TestDir); + EXPECT_TRUE(!EC); + + llvm::SmallString<128> TestCfg; + llvm::sys::path::append(TestCfg, TestDir, "foo"); + std::ofstream ConfigFile(TestCfg.c_str()); + EXPECT_TRUE(ConfigFile.is_open()); + ConfigFile << "# Comment\n" + "-option_1\n" + "@subconfig\n" + "-option_3=abcd\n" + "-option_4=\\\n" + "cdef\n"; + ConfigFile.close(); + + llvm::SmallString<128> TestCfg2; + llvm::sys::path::append(TestCfg2, TestDir, "subconfig"); + std::ofstream ConfigFile2(TestCfg2.c_str()); + EXPECT_TRUE(ConfigFile2.is_open()); + ConfigFile2 << "-option_2\n" + "\n" + " # comment\n"; + ConfigFile2.close(); + + // Make sure the current directory is not the directory where config files + // resides. In this case the code that expands response files will not find + // 'subconfig' unless it resolves nested inclusions relative to the including + // file. + llvm::SmallString<128> CurrDir; + EC = llvm::sys::fs::current_path(CurrDir); + EXPECT_TRUE(!EC); + EXPECT_TRUE(StringRef(CurrDir) != StringRef(TestDir)); + + llvm::BumpPtrAllocator A; + llvm::StringSaver Saver(A); + bool Result = llvm::cl::readConfigFile(TestCfg, Saver, Argv); + + EXPECT_TRUE(Result); + EXPECT_EQ(Argv.size(), 4U); + EXPECT_STREQ(Argv[0], "-option_1"); + EXPECT_STREQ(Argv[1], "-option_2"); + EXPECT_STREQ(Argv[2], "-option_3=abcd"); + EXPECT_STREQ(Argv[3], "-option_4=cdef"); + + llvm::sys::fs::remove(TestCfg2); + llvm::sys::fs::remove(TestCfg); + llvm::sys::fs::remove(TestDir); +} + } // anonymous namespace