forked from OSchip/llvm-project
Add support for 4-byte UCNs like \U12345678. Warn about UCNs in c90 mode.
llvm-svn: 115743
This commit is contained in:
parent
28f034c21a
commit
9762e0a234
|
@ -98,6 +98,10 @@ def warn_hex_escape_too_large : ExtWarn<"hex escape sequence out of range">;
|
|||
def ext_string_too_long : Extension<"string literal of length %0 exceeds "
|
||||
"maximum length %1 that %select{C90|ISO C99|C++}2 compilers are required to "
|
||||
"support">, InGroup<OverlengthStrings>;
|
||||
def warn_ucn_escape_too_large : ExtWarn<
|
||||
"character unicode escape sequence too long for its type">;
|
||||
def warn_ucn_not_valid_in_c89 : ExtWarn<
|
||||
"unicode escape sequences are only valid in C99 or C++">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PTH Diagnostics
|
||||
|
|
|
@ -172,8 +172,8 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
|
|||
SourceLocation Loc, Preprocessor &PP,
|
||||
bool wide,
|
||||
bool Complain) {
|
||||
// FIXME: Add a warning - UCN's are only valid in C++ & C99.
|
||||
// FIXME: Handle wide strings.
|
||||
if (!PP.getLangOptions().CPlusPlus && !PP.getLangOptions().C99)
|
||||
PP.Diag(Loc, diag::warn_ucn_not_valid_in_c89);
|
||||
|
||||
// Save the beginning of the string (for error diagnostics).
|
||||
const char *ThisTokBegin = ThisTokBuf;
|
||||
|
@ -218,13 +218,34 @@ static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
|
|||
}
|
||||
if (wide) {
|
||||
(void)UcnLenSave;
|
||||
assert(UcnLenSave == 4 &&
|
||||
"ProcessUCNEscape - only ucn length of 4 supported");
|
||||
// little endian assumed.
|
||||
*ResultBuf++ = (UcnVal & 0x000000FF);
|
||||
*ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
|
||||
*ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
|
||||
*ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
|
||||
assert((UcnLenSave == 4 || UcnLenSave == 8) &&
|
||||
"ProcessUCNEscape - only ucn length of 4 or 8 supported");
|
||||
|
||||
if (!PP.getLangOptions().ShortWChar) {
|
||||
// Note: our internal rep of wide char tokens is always little-endian.
|
||||
*ResultBuf++ = (UcnVal & 0x000000FF);
|
||||
*ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
|
||||
*ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
|
||||
*ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
|
||||
return;
|
||||
}
|
||||
|
||||
// Convert to UTF16.
|
||||
if (UcnVal < (UTF32)0xFFFF) {
|
||||
*ResultBuf++ = (UcnVal & 0x000000FF);
|
||||
*ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
|
||||
return;
|
||||
}
|
||||
PP.Diag(Loc, diag::warn_ucn_escape_too_large);
|
||||
|
||||
typedef uint16_t UTF16;
|
||||
UcnVal -= 0x10000;
|
||||
UTF16 surrogate1 = 0xD800 + (UcnVal >> 10);
|
||||
UTF16 surrogate2 = 0xDC00 + (UcnVal & 0x3FF);
|
||||
*ResultBuf++ = (surrogate1 & 0x000000FF);
|
||||
*ResultBuf++ = (surrogate1 & 0x0000FF00) >> 8;
|
||||
*ResultBuf++ = (surrogate2 & 0x000000FF);
|
||||
*ResultBuf++ = (surrogate2 & 0x0000FF00) >> 8;
|
||||
return;
|
||||
}
|
||||
// Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
// RUN: %clang_cc1 -emit-llvm -fshort-wchar %s -o - | FileCheck %s
|
||||
|
||||
int main() {
|
||||
// This should convert to utf8.
|
||||
// CHECK: internal constant [10 x i8] c"\E1\84\A0\C8\A0\F4\82\80\B0\00", align 1
|
||||
char b[10] = "\u1120\u0220\U00102030";
|
||||
|
||||
// CHECK: private constant [6 x i8] c"A\00B\00\00\00"
|
||||
void *foo = L"AB";
|
||||
|
||||
// This should convert to utf16.
|
||||
// CHECK: private constant [10 x i8] c" \11 \02\C8\DB0\DC\00\00"
|
||||
void *bar = L"\u1120\u0220\U00102030";
|
||||
}
|
|
@ -1,7 +1,16 @@
|
|||
// RUN: %clang_cc1 -emit-llvm %s -o -
|
||||
// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
|
||||
|
||||
int main() {
|
||||
// CHECK: internal constant [10 x i8] c"abc\00\00\00\00\00\00\00", align 1
|
||||
char a[10] = "abc";
|
||||
|
||||
// This should convert to utf8.
|
||||
// CHECK: internal constant [10 x i8] c"\E1\84\A0\C8\A0\F4\82\80\B0\00", align 1
|
||||
char b[10] = "\u1120\u0220\U00102030";
|
||||
|
||||
// CHECK: private constant [12 x i8] c"A\00\00\00B\00\00\00\00\00\00\00"
|
||||
void *foo = L"AB";
|
||||
|
||||
// CHECK: private constant [12 x i8] c"4\12\00\00\0B\F0\10\00\00\00\00\00"
|
||||
void *bar = L"\u1234\U0010F00B";
|
||||
}
|
||||
|
|
|
@ -27,3 +27,7 @@ void test2() {
|
|||
"sdjflksdjf lksdjf skldfjsdkljflksdjf kldsjflkdsj fldks jflsdkjfds"
|
||||
"sdjflksdjf lksdjf skldfjsdkljflksdjf kldsjflkdsj fldks jflsdkjfds";
|
||||
}
|
||||
|
||||
void test3() {
|
||||
(void)L"\u1234"; // expected-error {{unicode escape sequences are only valid in C99 or C++}}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
// RUN: %clang_cc1 -fsyntax-only -fshort-wchar -verify %s
|
||||
|
||||
void f() {
|
||||
(void)L"\U00010000"; // expected-warning {{character unicode escape sequence too long for its type}}
|
||||
}
|
||||
|
Loading…
Reference in New Issue