diff --git a/clang/include/clang/Basic/TokenKinds.h b/clang/include/clang/Basic/TokenKinds.h index e850971e34fa..dcbe1da1115d 100644 --- a/clang/include/clang/Basic/TokenKinds.h +++ b/clang/include/clang/Basic/TokenKinds.h @@ -68,15 +68,21 @@ inline bool isAnyIdentifier(TokenKind K) { return (K == tok::identifier) || (K == tok::raw_identifier); } +/// \brief Return true if this is a C or C++ string-literal (or +/// C++11 user-defined-string-literal) token. +inline bool isStringLiteral(TokenKind K) { + return K == tok::string_literal || K == tok::wide_string_literal || + K == tok::utf8_string_literal || K == tok::utf16_string_literal || + K == tok::utf32_string_literal; +} + /// \brief Return true if this is a "literal" kind, like a numeric /// constant, string, etc. inline bool isLiteral(TokenKind K) { - return (K == tok::numeric_constant) || (K == tok::char_constant) || - (K == tok::wide_char_constant) || (K == tok::utf16_char_constant) || - (K == tok::utf32_char_constant) || (K == tok::string_literal) || - (K == tok::wide_string_literal) || (K == tok::utf8_string_literal) || - (K == tok::utf16_string_literal) || (K == tok::utf32_string_literal) || - (K == tok::angle_string_literal); + return K == tok::numeric_constant || K == tok::char_constant || + K == tok::wide_char_constant || K == tok::utf16_char_constant || + K == tok::utf32_char_constant || isStringLiteral(K) || + K == tok::angle_string_literal; } /// \brief Return true if this is any of tok::annot_* kinds. diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 8e69b64c1a27..d8220b3ba559 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -279,11 +279,7 @@ private: /// isTokenStringLiteral - True if this token is a string-literal. /// bool isTokenStringLiteral() const { - return Tok.getKind() == tok::string_literal || - Tok.getKind() == tok::wide_string_literal || - Tok.getKind() == tok::utf8_string_literal || - Tok.getKind() == tok::utf16_string_literal || - Tok.getKind() == tok::utf32_string_literal; + return tok::isStringLiteral(Tok.getKind()); } /// \brief Returns true if the current token is '=' or is a type of '='. diff --git a/clang/lib/Lex/MacroArgs.cpp b/clang/lib/Lex/MacroArgs.cpp index e36596f2af3c..f6e781a936d4 100644 --- a/clang/lib/Lex/MacroArgs.cpp +++ b/clang/lib/Lex/MacroArgs.cpp @@ -215,15 +215,11 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks, // If this is a string or character constant, escape the token as specified // by 6.10.3.2p2. - if (Tok.is(tok::string_literal) || // "foo" - Tok.is(tok::wide_string_literal) || // L"foo" - Tok.is(tok::utf8_string_literal) || // u8"foo" - Tok.is(tok::utf16_string_literal) || // u"foo" - Tok.is(tok::utf32_string_literal) || // U"foo" - Tok.is(tok::char_constant) || // 'x' - Tok.is(tok::wide_char_constant) || // L'x'. - Tok.is(tok::utf16_char_constant) || // u'x'. - Tok.is(tok::utf32_char_constant)) { // U'x'. + if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc. + Tok.is(tok::char_constant) || // 'x' + Tok.is(tok::wide_char_constant) || // L'x'. + Tok.is(tok::utf16_char_constant) || // u'x'. + Tok.is(tok::utf32_char_constant)) { // U'x'. bool Invalid = false; std::string TokStr = PP.getSpelling(Tok, &Invalid); if (!Invalid) { diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp index 23d088a9fb23..2094dd1e1c6b 100644 --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -184,7 +184,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) { // Read the '"..."'. Lex(Tok); - if (Tok.isNot(tok::string_literal) && Tok.isNot(tok::wide_string_literal)) { + if (!tok::isStringLiteral(Tok.getKind())) { Diag(PragmaLoc, diag::err__Pragma_malformed); // Skip this token, and the ')', if present. if (Tok.isNot(tok::r_paren)) @@ -219,15 +219,50 @@ void Preprocessor::Handle_Pragma(Token &Tok) { SourceLocation RParenLoc = Tok.getLocation(); std::string StrVal = getSpelling(StrTok); - // The _Pragma is lexically sound. Destringize according to C99 6.10.9.1: - // "The string literal is destringized by deleting the L prefix, if present, + // The _Pragma is lexically sound. Destringize according to C11 6.10.9.1: + // "The string literal is destringized by deleting any encoding prefix, // deleting the leading and trailing double-quotes, replacing each escape // sequence \" by a double-quote, and replacing each escape sequence \\ by a // single backslash." - if (StrVal[0] == 'L') // Remove L prefix. + if (StrVal[0] == 'L' || StrVal[0] == 'U' || + (StrVal[0] == 'u' && StrVal[1] != '8')) StrVal.erase(StrVal.begin()); - assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' && - "Invalid string token!"); + else if (StrVal[0] == 'u') + StrVal.erase(StrVal.begin(), StrVal.begin() + 2); + + if (StrVal[0] == 'R') { + // FIXME: C++11 does not specify how to handle raw-string-literals here. + // We strip off the 'R', the quotes, the d-char-sequences, and the parens. + assert(StrVal[1] == '"' && StrVal[StrVal.size() - 1] == '"' && + "Invalid raw string token!"); + + // Measure the length of the d-char-sequence. + unsigned NumDChars = 0; + while (StrVal[2 + NumDChars] != '(') { + assert(NumDChars < (StrVal.size() - 5) / 2 && + "Invalid raw string token!"); + ++NumDChars; + } + assert(StrVal[StrVal.size() - 2 - NumDChars] == ')'); + + // Remove 'R " d-char-sequence' and 'd-char-sequence "'. We'll replace the + // parens below. + StrVal.erase(0, 2 + NumDChars); + StrVal.erase(StrVal.size() - 1 - NumDChars); + } else { + assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' && + "Invalid string token!"); + + // Remove escaped quotes and escapes. + for (unsigned i = 1, e = StrVal.size(); i < e-2; ++i) { + if (StrVal[i] == '\\' && + (StrVal[i+1] == '\\' || StrVal[i+1] == '"')) { + // \\ -> '\' and \" -> '"'. + StrVal.erase(StrVal.begin()+i); + --e; + } + } + } // Remove the front quote, replacing it with a space, so that the pragma // contents appear to have a space before them. @@ -236,16 +271,6 @@ void Preprocessor::Handle_Pragma(Token &Tok) { // Replace the terminating quote with a \n. StrVal[StrVal.size()-1] = '\n'; - // Remove escaped quotes and escapes. - for (unsigned i = 0, e = StrVal.size(); i != e-1; ++i) { - if (StrVal[i] == '\\' && - (StrVal[i+1] == '\\' || StrVal[i+1] == '"')) { - // \\ -> '\' and \" -> '"'. - StrVal.erase(StrVal.begin()+i); - --e; - } - } - // Plop the string (including the newline and trailing null) into a buffer // where we can lex it. Token TmpTok; diff --git a/clang/test/Lexer/pragma-operators.cpp b/clang/test/Lexer/pragma-operators.cpp index a76e0b2f97e2..6a5a498a151f 100644 --- a/clang/test/Lexer/pragma-operators.cpp +++ b/clang/test/Lexer/pragma-operators.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fms-extensions -E %s | FileCheck %s +// RUN: %clang_cc1 -fms-extensions -std=c++11 -E %s | FileCheck %s // Test that we properly expand the C99 _Pragma and Microsoft __pragma // into #pragma directives, with newlines where needed. @@ -17,3 +17,21 @@ #pragma warning(push) B(foo) #pragma warning(pop) + +#define pragma_L _Pragma(L"GCC diagnostic push") +#define pragma_u8 _Pragma(u8"system_header") +#define pragma_u _Pragma(u"GCC diagnostic pop") +#define pragma_U _Pragma(U"comment(lib, \"libfoo\")") +#define pragma_R _Pragma(R"(clang diagnostic ignored "-Wunused")") +#define pragma_UR _Pragma(UR"(clang diagnostic error "-Wunused")") +#define pragma_hello _Pragma(u8R"x(message R"y("Hello", world!)y")x") +// CHECK: int n = +// CHECK: #pragma GCC diagnostic push +// CHECK: #pragma system_header +// CHECK: #pragma GCC diagnostic pop +// CHECK: #pragma comment(lib, "libfoo") +// CHECK: #pragma clang diagnostic ignored "-Wunused" +// CHECK: #pragma clang diagnostic error "-Wunused" +// CHECK: #pragma message("\042Hello\042, world!") +// CHECK: 0; +int n = pragma_L pragma_u8 pragma_u pragma_U pragma_R pragma_UR pragma_hello 0;