Implement delimited escape sequences.

\x{XXXX} \u{XXXX} and \o{OOOO} are accepted in all languages mode in characters and string literals. This is a feature proposed for both C++ (P2290R1) and C (N2785). The papers have been seen by both committees but are not yet adopted into either standard. However, they do have support from both committees.
2021-09-15 09:52:25 -04:00 · 2021-09-15 09:52:25 -04:00 · 274adcb866
parent bbca392a7f
commit 274adcb866
7 changed files with 348 additions and 42 deletions
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@ -127,6 +127,15 @@ def warn_utf8_symbol_zero_width : Warning<
  "identifier contains Unicode character <U+%0> that is invisible in "
  "some environments">, InGroup<DiagGroup<"unicode-zero-width">>;

+def ext_delimited_escape_sequence : Extension<
+  "delimited escape sequences are a Clang extension">,
+  InGroup<DiagGroup<"delimited-escape-sequence-extension">>;
+def err_delimited_escape_empty : Error<
+  "delimited escape sequence cannot be empty">;
+def err_delimited_escape_missing_brace: Error<
+  "expected '{' after '\\%0' escape sequence">;
+def err_delimited_escape_invalid : Error<
+  "invalid digit '%0' in escape sequence">;
 def err_hex_escape_no_digits : Error<
  "\\%0 used with no following hex digits">;
 def warn_ucn_escape_no_digits : Warning<
@ -134,6 +143,12 @@ def warn_ucn_escape_no_digits : Warning<
  "treating as '\\' followed by identifier">, InGroup<Unicode>;
 def err_ucn_escape_incomplete : Error<
  "incomplete universal character name">;
+def warn_delimited_ucn_incomplete : Warning<
+  "incomplete delimited universal character name; "
+  "treating as '\\' 'u' '{' identifier">, InGroup<Unicode>;
+def warn_delimited_ucn_empty : Warning<
+  "empty delimited universal character name; "
+  "treating as '\\' 'u' '{' '}'">, InGroup<Unicode>;
 def warn_ucn_escape_incomplete : Warning<
  "incomplete universal character name; "
  "treating as '\\' followed by identifier">, InGroup<Unicode>;
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@ -3112,6 +3112,10 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
                           Token *Result) {
  unsigned CharSize;
  char Kind = getCharAndSize(StartPtr, CharSize);
+  bool Delimited = false;
+  bool FoundEndDelimiter = false;
+  unsigned Count = 0;
+  bool Diagnose = Result && !isLexingRawMode();

  unsigned NumHexDigits;
  if (Kind == 'u')
@ -3122,7 +3126,7 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
    return 0;

  if (!LangOpts.CPlusPlus && !LangOpts.C99) {
-    if (Result && !isLexingRawMode())
+    if (Diagnose)
      Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
    return 0;
  }
@ -3131,39 +3135,70 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
  const char *KindLoc = &CurPtr[-1];

  uint32_t CodePoint = 0;
-  for (unsigned i = 0; i < NumHexDigits; ++i) {
+  while (Count != NumHexDigits || Delimited) {
    char C = getCharAndSize(CurPtr, CharSize);
+    if (!Delimited && C == '{') {
+      Delimited = true;
+      CurPtr += CharSize;
+      continue;
+    }
+
+    if (Delimited && C == '}') {
+      CurPtr += CharSize;
+      FoundEndDelimiter = true;
+      break;
+    }

    unsigned Value = llvm::hexDigitValue(C);
    if (Value == -1U) {
-      if (Result && !isLexingRawMode()) {
-        if (i == 0) {
-          Diag(BufferPtr, diag::warn_ucn_escape_no_digits)
-            << StringRef(KindLoc, 1);
-        } else {
-          Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
-
-          // If the user wrote \U1234, suggest a fixit to \u.
-          if (i == 4 && NumHexDigits == 8) {
-            CharSourceRange URange = makeCharRange(*this, KindLoc, KindLoc + 1);
-            Diag(KindLoc, diag::note_ucn_four_not_eight)
-              << FixItHint::CreateReplacement(URange, "u");
-          }
-        }
-      }
+      if (!Delimited)
+        break;
+      if (Diagnose)
+        Diag(BufferPtr, diag::warn_delimited_ucn_incomplete)
+            << StringRef(&C, 1);
+      return 0;
+    }

+    if (CodePoint & 0xF000'0000) {
+      if (Diagnose)
+        Diag(KindLoc, diag::err_escape_too_large) << 0;
      return 0;
    }

    CodePoint <<= 4;
-    CodePoint += Value;
-
+    CodePoint |= Value;
    CurPtr += CharSize;
+    Count++;
+  }
+
+  if (Count == 0) {
+    if (Diagnose)
+      Diag(StartPtr, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
+                                       : diag::warn_ucn_escape_no_digits)
+          << StringRef(KindLoc, 1);
+    return 0;
+  }
+
+  if (!Delimited && Count != NumHexDigits) {
+    if (Diagnose) {
+      Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
+      // If the user wrote \U1234, suggest a fixit to \u.
+      if (Count == 4 && NumHexDigits == 8) {
+        CharSourceRange URange = makeCharRange(*this, KindLoc, KindLoc + 1);
+        Diag(KindLoc, diag::note_ucn_four_not_eight)
+            << FixItHint::CreateReplacement(URange, "u");
+      }
+    }
+    return 0;
+  }
+
+  if (Delimited && PP) {
+    Diag(BufferPtr, diag::ext_delimited_escape_sequence);
  }

  if (Result) {
    Result->setFlag(Token::HasUCN);
-    if (CurPtr - StartPtr == (ptrdiff_t)NumHexDigits + 2)
+    if (CurPtr - StartPtr == (ptrdiff_t)(Count + 2 + (Delimited ? 2 : 0)))
      StartPtr = CurPtr;
    else
      while (StartPtr != CurPtr)
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@ -95,6 +95,8 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
                                  DiagnosticsEngine *Diags,
                                  const LangOptions &Features) {
  const char *EscapeBegin = ThisTokBuf;
+  bool Delimited = false;
+  bool EndDelimiterFound = false;

  // Skip the '\' char.
  ++ThisTokBuf;
@ -143,26 +145,47 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
    break;
  case 'x': { // Hex escape.
    ResultChar = 0;
-    if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
+    if (ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
+      Delimited = true;
+      ThisTokBuf++;
+      if (*ThisTokBuf == '}') {
+        Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+             diag::err_delimited_escape_empty);
+        return ResultChar;
+      }
+    } else if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
      if (Diags)
        Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
             diag::err_hex_escape_no_digits) << "x";
-      HadError = true;
-      break;
+      return ResultChar;
    }

    // Hex escapes are a maximal series of hex digits.
    bool Overflow = false;
    for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
-      int CharVal = llvm::hexDigitValue(ThisTokBuf[0]);
-      if (CharVal == -1) break;
+      if (Delimited && *ThisTokBuf == '}') {
+        ThisTokBuf++;
+        EndDelimiterFound = true;
+        break;
+      }
+      int CharVal = llvm::hexDigitValue(*ThisTokBuf);
+      if (CharVal == -1) {
+        // Non delimited hex escape sequences stop at the first non-hex digit.
+        if (!Delimited)
+          break;
+        HadError = true;
+        if (Diags)
+          Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+               diag::err_delimited_escape_invalid)
+              << StringRef(ThisTokBuf, 1);
+        continue;
+      }
      // About to shift out a digit?
      if (ResultChar & 0xF0000000)
        Overflow = true;
      ResultChar <<= 4;
      ResultChar |= CharVal;
    }
-
    // See if any bits will be truncated when evaluated as a character.
    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
      Overflow = true;
@ -170,9 +193,13 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
    }

    // Check for overflow.
-    if (Overflow && Diags)   // Too many digits to fit in
-      Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
-           diag::err_escape_too_large) << 0;
+    if (!HadError && Overflow) { // Too many digits to fit in
+      HadError = true;
+      if (Diags)
+        Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+             diag::err_escape_too_large)
+            << 0;
+    }
    break;
  }
  case '0': case '1': case '2': case '3':
@ -200,7 +227,58 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
    }
    break;
  }
+  case 'o': {
+    bool Overflow = false;
+    if (ThisTokBuf == ThisTokEnd || *ThisTokBuf != '{') {
+      HadError = true;
+      if (Diags)
+        Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+             diag::err_delimited_escape_missing_brace);

+      break;
+    }
+    ResultChar = 0;
+    Delimited = true;
+    ++ThisTokBuf;
+    if (*ThisTokBuf == '}') {
+      Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+           diag::err_delimited_escape_empty);
+      return ResultChar;
+    }
+
+    while (ThisTokBuf != ThisTokEnd) {
+      if (*ThisTokBuf == '}') {
+        EndDelimiterFound = true;
+        ThisTokBuf++;
+        break;
+      }
+      if (*ThisTokBuf < '0' || *ThisTokBuf > '7') {
+        HadError = true;
+        if (Diags)
+          Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+               diag::err_delimited_escape_invalid)
+              << StringRef(ThisTokBuf, 1);
+        ThisTokBuf++;
+        continue;
+      }
+      if (ResultChar & 0x020000000)
+        Overflow = true;
+
+      ResultChar <<= 3;
+      ResultChar |= *ThisTokBuf++ - '0';
+    }
+    // Check for overflow.  Reject '\777', but not L'\777'.
+    if (!HadError &&
+        (Overflow || (CharWidth != 32 && (ResultChar >> CharWidth) != 0))) {
+      HadError = true;
+      if (Diags)
+        Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+             diag::err_escape_too_large)
+            << 1;
+      ResultChar &= ~0U >> (32 - CharWidth);
+    }
+    break;
+  }
    // Otherwise, these are not valid escapes.
  case '(': case '{': case '[': case '%':
    // GCC accepts these as extensions.  We warn about them as such though.
@ -224,6 +302,17 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
    break;
  }

+  if (Delimited && Diags) {
+    if (!EndDelimiterFound)
+      Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+           diag::err_expected)
+          << tok::r_brace;
+    else if (!HadError) {
+      Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+           diag::ext_delimited_escape_sequence);
+    }
+  }
+
  return ResultChar;
 }

@ -245,18 +334,32 @@ void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
    }

    ++I;
-    assert(*I == 'u' || *I == 'U');
+    char Kind = *I;
+    ++I;
+
+    assert(Kind == 'u' || Kind == 'U');
+    uint32_t CodePoint = 0;
+
+    if (Kind == 'u' && *I == '{') {
+      for (++I; *I != '}'; ++I) {
+        unsigned Value = llvm::hexDigitValue(*I);
+        assert(Value != -1U);
+        CodePoint <<= 4;
+        CodePoint += Value;
+      }
+      appendCodePoint(CodePoint, Buf);
+      continue;
+    }

    unsigned NumHexDigits;
-    if (*I == 'u')
+    if (Kind == 'u')
      NumHexDigits = 4;
    else
      NumHexDigits = 8;

    assert(I + NumHexDigits <= E);

-    uint32_t CodePoint = 0;
-    for (++I; NumHexDigits != 0; ++I, --NumHexDigits) {
+    for (; NumHexDigits != 0; ++I, --NumHexDigits) {
      unsigned Value = llvm::hexDigitValue(*I);
      assert(Value != -1U);

@ -282,28 +385,82 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
  // Skip the '\u' char's.
  ThisTokBuf += 2;

-  if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
+  bool Delimited = false;
+  bool EndDelimiterFound = false;
+  bool HasError = false;
+
+  if (UcnBegin[1] == 'u' && in_char_string_literal &&
+      ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
+    Delimited = true;
+    ThisTokBuf++;
+  } else if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
    if (Diags)
      Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
           diag::err_hex_escape_no_digits) << StringRef(&ThisTokBuf[-1], 1);
    return false;
  }
  UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
-  unsigned short UcnLenSave = UcnLen;
-  for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) {
-    int CharVal = llvm::hexDigitValue(ThisTokBuf[0]);
-    if (CharVal == -1) break;
+
+  bool Overflow = false;
+  unsigned short Count = 0;
+  for (; ThisTokBuf != ThisTokEnd && (Delimited || Count != UcnLen);
+       ++ThisTokBuf) {
+    if (Delimited && *ThisTokBuf == '}') {
+      ++ThisTokBuf;
+      EndDelimiterFound = true;
+      break;
+    }
+    int CharVal = llvm::hexDigitValue(*ThisTokBuf);
+    if (CharVal == -1) {
+      HasError = true;
+      if (!Delimited)
+        break;
+      if (Diags) {
+        Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+             diag::err_delimited_escape_invalid)
+            << StringRef(ThisTokBuf, 1);
+      }
+      Count++;
+      continue;
+    }
+    if (UcnVal & 0xF0000000) {
+      Overflow = true;
+      continue;
+    }
    UcnVal <<= 4;
    UcnVal |= CharVal;
+    Count++;
  }
-  // If we didn't consume the proper number of digits, there is a problem.
-  if (UcnLenSave) {
+
+  if (Overflow) {
    if (Diags)
      Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
-           diag::err_ucn_escape_incomplete);
+           diag::err_escape_too_large)
+          << 0;
    return false;
  }

+  if (Delimited && !EndDelimiterFound) {
+    if (Diags) {
+      Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+           diag::err_expected)
+          << tok::r_brace;
+    }
+    return false;
+  }
+
+  // If we didn't consume the proper number of digits, there is a problem.
+  if (Count == 0 || (!Delimited && Count != UcnLen)) {
+    if (Diags)
+      Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+           Delimited ? diag::err_delimited_escape_empty
+                     : diag::err_ucn_escape_incomplete);
+    return false;
+  }
+
+  if (HasError)
+    return false;
+
  // Check UCN constraints (C99 6.4.3p2) [C++11 lex.charset p2]
  if ((0xD800 <= UcnVal && UcnVal <= 0xDFFF) || // surrogate codepoints
      UcnVal > 0x10FFFF) {                      // maximum legal UTF32 value
@ -338,6 +495,10 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
    Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
         diag::warn_ucn_not_valid_in_c89_literal);

+  if (Delimited && Diags)
+    Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+         diag::ext_delimited_escape_sequence);
+
  return true;
 }

--- a/clang/test/Lexer/char-escapes-delimited.c
+++ b/clang/test/Lexer/char-escapes-delimited.c
@ -0,0 +1,81 @@
+// RUN: %clang_cc1 -fsyntax-only -pedantic -verify %s
+// RUN: %clang_cc1 -x c -fsyntax-only -pedantic -verify %s
+// RUN: %clang_cc1 -fwchar-type=short -fno-signed-wchar -fsyntax-only -pedantic -verify %s
+// RUN: %clang_cc1 -x c -fwchar-type=short -fno-signed-wchar -fsyntax-only -pedantic -verify %s
+
+const char *errors =
+    "\u{}"  //expected-error {{delimited escape sequence cannot be empty}}
+    "\u{"   //expected-error {{expected '}'}}
+    "\u{h}" //expected-error {{invalid digit 'h' in escape sequence}}
+    "\x{}"  //expected-error {{delimited escape sequence cannot be empty}}
+    "\x{"   //expected-error {{expected '}'}}
+    "\x{h}" //expected-error {{invalid digit 'h' in escape sequence}}
+    "\o{}"  //expected-error {{delimited escape sequence cannot be empty}}
+    "\o{"   //expected-error {{expected '}'}}
+    "\o{8}" //expected-error {{invalid digit '8' in escape sequence}}
+    ;
+
+void ucn() {
+  char a = '\u{1234}'; // expected-error {{character too large for enclosing character literal type}}
+                       // expected-warning@-1 {{delimited escape sequences are a Clang extension}}
+
+  unsigned b = U'\u{1234}'; // expected-warning {{extension}}
+
+#ifdef __cplusplus
+  unsigned b2 = U'\u{1}'; // expected-warning {{extension}}
+#else
+  unsigned b2 = U'\u{1}';     //expected-error {{universal character name refers to a control character}}
+#endif
+
+  unsigned c = U'\u{000000000001234}'; // expected-warning {{extension}}
+  unsigned d = U'\u{111111111}';       //expected-error {{hex escape sequence out of range}}
+}
+
+void hex() {
+  char a = '\x{1}';             // expected-warning {{extension}}
+  char b = '\x{abcdegggggabc}'; // expected-error 5{{invalid digit 'g' in escape sequence}}
+  char c = '\x{ff1}';           // expected-error {{hex escape sequence out of range}}
+
+#if __WCHAR_MAX__ > 0xFFFF
+  unsigned d = L'\x{FFFFFFFF}';  // expected-warning {{extension}}
+  unsigned e = L'\x{100000000}'; // expected-error {{hex escape sequence out of range}}
+#else
+  unsigned f = L'\x{FFFF}';   // expected-warning {{extension}}
+  unsigned g = L'\x{10000}';  // expected-error {{hex escape sequence out of range}}
+#endif
+  unsigned h = U'\x{FFFFFFFF}';  // expected-warning {{extension}}
+  unsigned i = U'\x{100000000}'; // expected-error {{hex escape sequence out of range}}
+}
+
+void octal() {
+  char a = '\o{1}';              // expected-warning {{extension}}
+  char b = '\o{12345678881238}'; // expected-error 4{{invalid digit '8' in escape sequence}}
+  char c = '\o{777}';            // //expected-error {{octal escape sequence out of range}}
+#if __WCHAR_MAX__ > 0xFFFF
+  unsigned d = L'\o{37777777777}'; // expected-warning {{extension}}
+  unsigned e = L'\o{40000000000}'; // expected-error {{octal escape sequence out of range}}
+#else
+  unsigned d = L'\o{177777}'; // expected-warning {{extension}}
+  unsigned e = L'\o{200000}'; // expected-error {{octal escape sequence out of range}}
+#endif
+}
+
+void concat() {
+  (void)"\x{" "12}"; // expected-error {{expected '}'}}
+  (void)"\u{" "12}"; // expected-error {{expected '}'}}
+  (void)"\o{" "12}"; // expected-error {{expected '}'}}
+
+  (void)"\x{12" "}"; // expected-error {{expected '}'}}
+  (void)"\u{12" "}"; // expected-error {{expected '}'}}
+  (void)"\o{12" "}"; // expected-error {{expected '}'}}
+}
+
+void separators() {
+  (void)"\x{12'3}"; // expected-error {{invalid digit ''' in escape sequence}}
+  (void)"\u{12'3}"; // expected-error {{invalid digit ''' in escape sequence}}
+  (void)"\o{12'3}"; // expected-error {{invalid digit ''' in escape sequence}}
+
+  '\x{12'3'}';   // expected-error {{expected '}'}}
+                 // expected-error@-1 2{{expected ';'}}
+                 // expected-warning@-2 3{{expression result unused}}
+}
--- a/clang/test/Parser/cxx11-user-defined-literals.cpp
+++ b/clang/test/Parser/cxx11-user-defined-literals.cpp
@ -129,6 +129,9 @@ int operator""_\U00010000(char) {} // expected-error {{redefinition of 'operator
 int operator""_℮""_\u212e""_\U0000212e""(const char*, size_t);
 int operator""_\u212e""_\U0000212e""_℮""(const char*, size_t);
 int operator""_\U0000212e""_℮""_\u212e""(const char*, size_t);
+
+int operator""_\u{212f}(char);
+
 int mix_ucn_utf8 = ""_℮""_\u212e""_\U0000212e"";

 void operator""_℮""_ℯ(unsigned long long) {} // expected-error {{differing user-defined suffixes ('_℮' and '_ℯ') in string literal concatenation}}
--- a/clang/test/Preprocessor/ucn-pp-identifier.c
+++ b/clang/test/Preprocessor/ucn-pp-identifier.c
@ -16,6 +16,10 @@
 #error "This should never happen"
 #endif

+#if a\u{FD}() //expected-warning {{Clang extension}}
+#error "This should never happen"
+#endif
+
 #if \uarecool // expected-warning{{incomplete universal character name; treating as '\' followed by identifier}} expected-error {{invalid token at start of a preprocessor expression}}
 #endif
 #if \uwerecool // expected-warning{{\u used with no following hex digits; treating as '\' followed by identifier}} expected-error {{invalid token at start of a preprocessor expression}}
@ -27,6 +31,7 @@
 #define \ufffe // expected-error {{macro name must be an identifier}}
 #define \U10000000  // expected-error {{macro name must be an identifier}}
 #define \u0061  // expected-error {{character 'a' cannot be specified by a universal character name}} expected-error {{macro name must be an identifier}}
+#define \u{fffe} // expected-error {{macro name must be an identifier}} expected-warning {{Clang extension}}

 #define a\u0024

@ -103,3 +108,8 @@ C 1
 // CHECK-NEXT:   #define capital_u_\U00FC
 // CHECK-NEXT: {{^                   \^}}
 // CHECK-NEXT: {{^                   u}}
+
+#define \u{}           // expected-warning {{empty delimited universal character name; treating as '\' 'u' '{' '}'}} expected-error {{macro name must be an identifier}}
+#define \u{123456789}  // expected-error {{hex escape sequence out of range}} expected-error {{macro name must be an identifier}}
+#define \u{            // expected-warning {{incomplete delimited universal character name; treating as '\' 'u' '{' identifier}} expected-error {{macro name must be an identifier}}
+#define \u{fgh}        // expected-warning {{incomplete delimited universal character name; treating as '\' 'u' '{' identifier}} expected-error {{macro name must be an identifier}}
--- a/clang/test/Sema/ucn-identifiers.c
+++ b/clang/test/Sema/ucn-identifiers.c
@ -17,6 +17,7 @@ void goodCalls() {
  \u00fcber(1);
  über(2);
  \U000000FCber(3);
+  \u{FC}ber(4); // expected-warning {{Clang extension}}
 }

 void badCalls() {
@ -24,7 +25,7 @@ void badCalls() {
  \u00fcber = 0; // expected-error{{non-object type 'void (int)' is not assignable}}

  über(1, 2);
-  \U000000FCber(); 
+  \U000000FCber();
 #ifdef __cplusplus
  // expected-error@-3 {{no matching function}}
  // expected-error@-3 {{no matching function}}