Implement literal suffixes for _BitInt

WG14 adopted N2775 (http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2775.pdf)
at our Feb 2022 meeting. This paper adds a literal suffix for
bit-precise types that automatically sizes the bit-precise type to be
the smallest possible legal _BitInt type that can represent the literal
value. The suffix chosen is wb (for a signed bit-precise type) which
can be combined with the u suffix (for an unsigned bit-precise type).

The preprocessor continues to operate as-if all integer types were
intmax_t/uintmax_t, including bit-precise integer types. It is a
constraint violation if the bit-precise literal is too large to fit
within that type in the context of the preprocessor (when still using
a pp-number preprocessing token), but it is not a constraint violation
in other circumstances. This allows you to make bit-precise integer
literals that are wider than what the preprocessor currently supports
in order to initialize variables, etc.
This commit is contained in:
Aaron Ballman 2022-03-14 09:22:37 -04:00
parent dc152659b4
commit 8cba72177d
12 changed files with 359 additions and 33 deletions

View File

@ -111,6 +111,8 @@ C2x Feature Support
- Implemented `WG14 N2674 The noreturn attribute <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2764.pdf>`_.
- Implemented `WG14 N2935 Make false and true first-class language features <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2935.pdf>`_.
- Implemented `WG14 N2763 Adding a fundamental type for N-bit integers <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2763.pdf>`_.
- Implemented `WG14 N2775 Literal suffixes for bit-precise integers <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2775.pdf>`_.
C++ Language Changes in Clang
-----------------------------

View File

@ -207,6 +207,12 @@ def err_cxx2b_size_t_suffix: Error<
def err_size_t_literal_too_large: Error<
"%select{signed |}0'size_t' literal is out of range of possible "
"%select{signed |}0'size_t' values">;
def ext_c2x_bitint_suffix : ExtWarn<
"'_BitInt' suffix for literals is a C2x extension">,
InGroup<C2x>;
def warn_c2x_compat_bitint_suffix : Warning<
"'_BitInt' suffix for literals is incompatible with C standards before C2x">,
InGroup<CPre2xCompat>, DefaultIgnore;
def err_integer_literal_too_large : Error<
"integer literal is too large to be represented in any %select{signed |}0"
"integer type">;

View File

@ -69,10 +69,11 @@ public:
bool isImaginary : 1; // 1.0i
bool isFloat16 : 1; // 1.0f16
bool isFloat128 : 1; // 1.0q
uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr
bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk
bool isBitInt : 1; // 1wb, 1uwb (C2x)
uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
bool isFixedPointLiteral() const {
return (saw_period || saw_exponent) && saw_fixed_point_suffix;
@ -120,6 +121,13 @@ public:
/// calculating the digit sequence of the exponent.
bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale);
/// Get the digits that comprise the literal. This excludes any prefix or
/// suffix associated with the literal.
StringRef getLiteralDigits() const {
assert(!hadError && "cannot reliably get the literal digits with an error");
return StringRef(DigitsBegin, SuffixBegin - DigitsBegin);
}
private:
void ParseNumberStartingWithZero(SourceLocation TokLoc);

View File

@ -1153,6 +1153,11 @@ void StmtPrinter::VisitIntegerLiteral(IntegerLiteral *Node) {
bool isSigned = Node->getType()->isSignedIntegerType();
OS << toString(Node->getValue(), 10, isSigned);
if (isa<BitIntType>(Node->getType())) {
OS << (isSigned ? "wb" : "uwb");
return;
}
// Emit suffixes. Integer literals are always a builtin integer type.
switch (Node->getType()->castAs<BuiltinType>()->getKind()) {
default: llvm_unreachable("Unexpected type for integer literal!");

View File

@ -711,6 +711,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
isFract = false;
isAccum = false;
hadError = false;
isBitInt = false;
// This routine assumes that the range begin/end matches the regex for integer
// and FP constants (specifically, the 'pp-number' regex), and assumes that
@ -895,6 +896,24 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
if (isImaginary) break; // Cannot be repeated.
isImaginary = true;
continue; // Success.
case 'w':
case 'W':
if (isFPConstant)
break; // Invalid for floats.
if (HasSize)
break; // Invalid if we already have a size for the literal.
// wb and WB are allowed, but a mixture of cases like Wb or wB is not. We
// explicitly do not support the suffix in C++ as an extension because a
// library-based UDL that resolves to a library type may be more
// appropriate there.
if (!LangOpts.CPlusPlus && (s[0] == 'w' && s[1] == 'b') ||
(s[0] == 'W' && s[1] == 'B')) {
isBitInt = true;
HasSize = true;
++s; // Skip both characters (2nd char skipped on continue).
continue; // Success.
}
}
// If we reached here, there was an error or a ud-suffix.
break;
@ -916,6 +935,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
isFloat16 = false;
isHalf = false;
isImaginary = false;
isBitInt = false;
MicrosoftInteger = 0;
saw_fixed_point_suffix = false;
isFract = false;
@ -1145,8 +1165,14 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
// floating point constant, the radix will change to 10. Octal floating
// point constants are not permitted (only decimal and hexadecimal).
radix = 8;
DigitsBegin = s;
const char *PossibleNewDigitStart = s;
s = SkipOctalDigits(s);
// When the value is 0 followed by a suffix (like 0wb), we want to leave 0
// as the start of the digits. So if skipping octal digits does not skip
// anything, we leave the digit start where it was.
if (s != PossibleNewDigitStart)
DigitsBegin = PossibleNewDigitStart;
if (s == ThisTokEnd)
return; // Done, simple octal number like 01234

View File

@ -331,6 +331,14 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
: diag::ext_cxx2b_size_t_suffix
: diag::err_cxx2b_size_t_suffix);
// 'wb/uwb' literals are a C2x feature. We explicitly do not support the
// suffix in C++ as an extension because a library-based UDL that resolves
// to a library type may be more appropriate there.
if (Literal.isBitInt)
PP.Diag(PeekTok, PP.getLangOpts().C2x
? diag::warn_c2x_compat_bitint_suffix
: diag::ext_c2x_bitint_suffix);
// Parse the integer literal into Result.
if (Literal.GetIntegerValue(Result.Val)) {
// Overflow parsing integer literal.

View File

@ -3922,9 +3922,27 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
: diag::ext_cxx2b_size_t_suffix
: diag::err_cxx2b_size_t_suffix);
// Get the value in the widest-possible width.
unsigned MaxWidth = Context.getTargetInfo().getIntMaxTWidth();
llvm::APInt ResultVal(MaxWidth, 0);
// 'wb/uwb' literals are a C2x feature. We support _BitInt as a type in C++,
// but we do not currently support the suffix in C++ mode because it's not
// entirely clear whether WG21 will prefer this suffix to return a library
// type such as std::bit_int instead of returning a _BitInt.
if (Literal.isBitInt && !getLangOpts().CPlusPlus)
PP.Diag(Tok.getLocation(), getLangOpts().C2x
? diag::warn_c2x_compat_bitint_suffix
: diag::ext_c2x_bitint_suffix);
// Get the value in the widest-possible width. What is "widest" depends on
// whether the literal is a bit-precise integer or not. For a bit-precise
// integer type, try to scan the source to determine how many bits are
// needed to represent the value. This may seem a bit expensive, but trying
// to get the integer value from an overly-wide APInt is *extremely*
// expensive, so the naive approach of assuming
// llvm::IntegerType::MAX_INT_BITS is a big performance hit.
unsigned BitsNeeded =
Literal.isBitInt ? llvm::APInt::getSufficientBitsNeeded(
Literal.getLiteralDigits(), Literal.getRadix())
: Context.getTargetInfo().getIntMaxTWidth();
llvm::APInt ResultVal(BitsNeeded, 0);
if (Literal.GetIntegerValue(ResultVal)) {
// If this value didn't fit into uintmax_t, error and force to ull.
@ -3956,6 +3974,32 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
}
}
// Bit-precise integer literals are automagically-sized based on the
// width required by the literal.
if (Literal.isBitInt) {
// The signed version has one more bit for the sign value. There are no
// zero-width bit-precise integers, even if the literal value is 0.
Width = Literal.isUnsigned ? std::max(ResultVal.getActiveBits(), 1u)
: std::max(ResultVal.getMinSignedBits(), 2u);
// Diagnose if the width of the constant is larger than BITINT_MAXWIDTH,
// and reset the type to the largest supported width.
unsigned int MaxBitIntWidth =
Context.getTargetInfo().getMaxBitIntWidth();
if (Width > MaxBitIntWidth) {
Diag(Tok.getLocation(), diag::err_integer_literal_too_large)
<< Literal.isUnsigned;
Width = MaxBitIntWidth;
}
// Reset the result value to the smaller APInt and select the correct
// type to be used. Note, we zext even for signed values because the
// literal itself is always an unsigned value (a preceeding - is a
// unary operator, not part of the literal).
ResultVal = ResultVal.zextOrTrunc(Width);
Ty = Context.getBitIntType(Literal.isUnsigned, Width);
}
// Check C++2b size_t literals.
if (Literal.isSizeT) {
assert(!Literal.MicrosoftInteger &&

View File

@ -0,0 +1,47 @@
// RUN: %clang_cc1 -std=c2x -ast-dump -Wno-unused %s | FileCheck --strict-whitespace %s
// CHECK: FunctionDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE+1]]:1, line:{{[0-9]*}}:1> line:[[@LINE+1]]:6 func 'void (void)'
void func(void) {
// Ensure that we calculate the correct type from the literal suffix.
// Note: 0wb should create an _BitInt(2) because a signed bit-precise
// integer requires one bit for the sign and one bit for the value,
// at a minimum.
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:27> col:27 zero_wb 'typeof (0wb)':'_BitInt(2)'
typedef __typeof__(0wb) zero_wb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 neg_zero_wb 'typeof (-0wb)':'_BitInt(2)'
typedef __typeof__(-0wb) neg_zero_wb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:27> col:27 one_wb 'typeof (1wb)':'_BitInt(2)'
typedef __typeof__(1wb) one_wb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 neg_one_wb 'typeof (-1wb)':'_BitInt(2)'
typedef __typeof__(-1wb) neg_one_wb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 zero_uwb 'typeof (0uwb)':'unsigned _BitInt(1)'
typedef __typeof__(0uwb) zero_uwb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 neg_zero_uwb 'typeof (-0uwb)':'unsigned _BitInt(1)'
typedef __typeof__(-0uwb) neg_zero_uwb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 one_uwb 'typeof (1uwb)':'unsigned _BitInt(1)'
typedef __typeof__(1uwb) one_uwb;
// Try a value that is too large to fit in [u]intmax_t.
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:47> col:47 huge_uwb 'typeof (18446744073709551616uwb)':'unsigned _BitInt(65)'
typedef __typeof__(18446744073709551616uwb) huge_uwb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:46> col:46 huge_wb 'typeof (18446744073709551616wb)':'_BitInt(66)'
typedef __typeof__(18446744073709551616wb) huge_wb;
}
// Test the examples from the paper.
// CHECK: FunctionDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE+1]]:1, line:{{[0-9]*}}:1> line:[[@LINE+1]]:6 from_paper 'void (void)'
void from_paper(void) {
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 neg_three_wb 'typeof (-3wb)':'_BitInt(3)'
typedef __typeof__(-3wb) neg_three_wb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 neg_three_hex_wb 'typeof (-3wb)':'_BitInt(3)'
typedef __typeof__(-0x3wb) neg_three_hex_wb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:27> col:27 three_wb 'typeof (3wb)':'_BitInt(3)'
typedef __typeof__(3wb) three_wb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 three_uwb 'typeof (3uwb)':'unsigned _BitInt(2)'
typedef __typeof__(3uwb) three_uwb;
// CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 neg_three_uwb 'typeof (-3uwb)':'unsigned _BitInt(2)'
typedef __typeof__(-3uwb) neg_three_uwb;
}

View File

@ -0,0 +1,14 @@
// RUN: %clang_cc1 -std=c17 -fsyntax-only -verify=ext -Wno-unused %s
// RUN: %clang_cc1 -std=c2x -fsyntax-only -verify=compat -Wpre-c2x-compat -Wno-unused %s
// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wno-unused -x c++ %s
#if 18446744073709551615uwb // ext-warning {{'_BitInt' suffix for literals is a C2x extension}} \
compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C2x}} \
cpp-error {{invalid suffix 'uwb' on integer constant}}
#endif
void func(void) {
18446744073709551615wb; // ext-warning {{'_BitInt' suffix for literals is a C2x extension}} \
compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C2x}} \
cpp-error {{invalid suffix 'wb' on integer constant}}
}

View File

@ -0,0 +1,144 @@
// RUN: %clang_cc1 -std=c2x -fsyntax-only -verify -Wno-unused %s
// Test that the preprocessor behavior makes sense.
#if 1wb != 1
#error "wb suffix must be recognized by preprocessor"
#endif
#if 1uwb != 1
#error "uwb suffix must be recognized by preprocessor"
#endif
#if !(-1wb < 0)
#error "wb suffix must be interpreted as signed"
#endif
#if !(-1uwb > 0)
#error "uwb suffix must be interpreted as unsigned"
#endif
#if 18446744073709551615uwb != 18446744073709551615ULL
#error "expected the max value for uintmax_t to compare equal"
#endif
// Test that the preprocessor gives appropriate diagnostics when the
// literal value is larger than what can be stored in a [u]intmax_t.
#if 18446744073709551616wb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
#error "never expected to get here due to error"
#endif
#if 18446744073709551616uwb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
#error "never expected to get here due to error"
#endif
// Despite using a bit-precise integer, this is expected to overflow
// because all preprocessor arithmetic is done in [u]intmax_t, so this
// should result in the value 0.
#if 18446744073709551615uwb + 1 != 0ULL
#error "expected modulo arithmetic with uintmax_t width"
#endif
// Because this bit-precise integer is signed, it will also overflow,
// but Clang handles that by converting to uintmax_t instead of
// intmax_t.
#if 18446744073709551615wb + 1 != 0LL // expected-warning {{integer literal is too large to be represented in a signed integer type, interpreting as unsigned}}
#error "expected modulo arithmetic with uintmax_t width"
#endif
// Test that just because the preprocessor can't figure out the bit
// width doesn't mean we can't form the constant, it just means we
// can't use the value in a preprocessor conditional.
unsigned _BitInt(65) Val = 18446744073709551616uwb;
void ValidSuffix(void) {
// Decimal literals.
1wb;
1WB;
-1wb;
_Static_assert((int)1wb == 1, "not 1?");
_Static_assert((int)-1wb == -1, "not -1?");
1uwb;
1uWB;
1Uwb;
1UWB;
_Static_assert((unsigned int)1uwb == 1u, "not 1?");
1'2wb;
1'2uwb;
_Static_assert((int)1'2wb == 12, "not 12?");
_Static_assert((unsigned int)1'2uwb == 12u, "not 12?");
// Hexadecimal literals.
0x1wb;
0x1uwb;
0x0'1'2'3wb;
0xA'B'c'duwb;
_Static_assert((int)0x0'1'2'3wb == 0x0123, "not 0x0123");
_Static_assert((unsigned int)0xA'B'c'duwb == 0xABCDu, "not 0xABCD");
// Binary literals.
0b1wb;
0b1uwb;
0b1'0'1'0'0'1wb;
0b0'1'0'1'1'0uwb;
_Static_assert((int)0b1wb == 1, "not 1?");
_Static_assert((unsigned int)0b1uwb == 1u, "not 1?");
// Octal literals.
01wb;
01uwb;
0'6'0wb;
0'0'1uwb;
0wbu;
0WBu;
0wbU;
0WBU;
0wb;
_Static_assert((int)0wb == 0, "not 0?");
_Static_assert((unsigned int)0wbu == 0u, "not 0?");
// Imaginary or Complex. These are allowed because _Complex can work with any
// integer type, and that includes _BitInt.
1iwb;
1wbj;
}
void InvalidSuffix(void) {
// Can't mix the case of wb or WB, and can't rearrange the letters.
0wB; // expected-error {{invalid suffix 'wB' on integer constant}}
0Wb; // expected-error {{invalid suffix 'Wb' on integer constant}}
0bw; // expected-error {{invalid digit 'b' in octal constant}}
0BW; // expected-error {{invalid digit 'B' in octal constant}}
// Trailing digit separators should still diagnose.
1'2'wb; // expected-error {{digit separator cannot appear at end of digit sequence}}
1'2'uwb; // expected-error {{digit separator cannot appear at end of digit sequence}}
// Long.
1lwb; // expected-error {{invalid suffix}}
1wbl; // expected-error {{invalid suffix}}
1luwb; // expected-error {{invalid suffix}}
1ulwb; // expected-error {{invalid suffix}}
// Long long.
1llwb; // expected-error {{invalid suffix}}
1uwbll; // expected-error {{invalid suffix}}
// Floating point.
0.1wb; // expected-error {{invalid suffix}}
0.1fwb; // expected-error {{invalid suffix}}
// Repetitive suffix.
1wbwb; // expected-error {{invalid suffix}}
1uwbuwb; // expected-error {{invalid suffix}}
1wbuwb; // expected-error {{invalid suffix}}
1uwbwb; // expected-error {{invalid suffix}}
}
void ValidSuffixInvalidValue(void) {
// This is a valid suffix, but the value is larger than one that fits within
// the width of BITINT_MAXWIDTH. When this value changes in the future, the
// test cases should pick a new value that can't be represented by a _BitInt,
// but also add a test case that a 129-bit literal still behaves as-expected.
_Static_assert(__BITINT_MAXWIDTH__ <= 128,
"Need to pick a bigger constant for the test case below.");
0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1wb; // expected-error {{integer literal is too large to be represented in any signed integer type}}
0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1uwb; // expected-error {{integer literal is too large to be represented in any integer type}}
}

View File

@ -1506,6 +1506,11 @@ public:
/// equivalent of the string given by \p str.
static unsigned getBitsNeeded(StringRef str, uint8_t radix);
/// Get the bits that are sufficient to represent the string value. This may
/// over estimate the amount of bits required, but it does not require
/// parsing the value in the string.
static unsigned getSufficientBitsNeeded(StringRef Str, uint8_t Radix);
/// The APInt version of the countLeadingZeros functions in
/// MathExtras.h.
///

View File

@ -502,12 +502,51 @@ uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
return retBits;
}
unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
assert(!str.empty() && "Invalid string length");
assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
radix == 36) &&
"Radix should be 2, 8, 10, 16, or 36!");
unsigned APInt::getSufficientBitsNeeded(StringRef Str, uint8_t Radix) {
assert(!Str.empty() && "Invalid string length");
size_t StrLen = Str.size();
// Each computation below needs to know if it's negative.
unsigned IsNegative = false;
if (Str[0] == '-' || Str[0] == '+') {
IsNegative = Str[0] == '-';
StrLen--;
assert(StrLen && "String is only a sign, needs a value.");
}
// For radixes of power-of-two values, the bits required is accurately and
// easily computed.
if (Radix == 2)
return StrLen + IsNegative;
if (Radix == 8)
return StrLen * 3 + IsNegative;
if (Radix == 16)
return StrLen * 4 + IsNegative;
// Compute a sufficient number of bits that is always large enough but might
// be too large. This avoids the assertion in the constructor. This
// calculation doesn't work appropriately for the numbers 0-9, so just use 4
// bits in that case.
if (Radix == 10)
return (StrLen == 1 ? 4 : StrLen * 64 / 18) + IsNegative;
assert(Radix == 36);
return (StrLen == 1 ? 7 : StrLen * 16 / 3) + IsNegative;
}
unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
// Compute a sufficient number of bits that is always large enough but might
// be too large.
unsigned sufficient = getSufficientBitsNeeded(str, radix);
// For bases 2, 8, and 16, the sufficient number of bits is exact and we can
// return the value directly. For bases 10 and 36, we need to do extra work.
if (radix == 2 || radix == 8 || radix == 16)
return sufficient;
// This is grossly inefficient but accurate. We could probably do something
// with a computation of roughly slen*64/20 and then adjust by the value of
// the first few digits. But, I'm not sure how accurate that could be.
size_t slen = str.size();
// Each computation below needs to know if it's negative.
@ -519,28 +558,6 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
assert(slen && "String is only a sign, needs a value.");
}
// For radixes of power-of-two values, the bits required is accurately and
// easily computed
if (radix == 2)
return slen + isNegative;
if (radix == 8)
return slen * 3 + isNegative;
if (radix == 16)
return slen * 4 + isNegative;
// FIXME: base 36
// This is grossly inefficient but accurate. We could probably do something
// with a computation of roughly slen*64/20 and then adjust by the value of
// the first few digits. But, I'm not sure how accurate that could be.
// Compute a sufficient number of bits that is always large enough but might
// be too large. This avoids the assertion in the constructor. This
// calculation doesn't work appropriately for the numbers 0-9, so just use 4
// bits in that case.
unsigned sufficient
= radix == 10? (slen == 1 ? 4 : slen * 64/18)
: (slen == 1 ? 7 : slen * 16/3);
// Convert to the actual binary value.
APInt tmp(sufficient, StringRef(p, slen), radix);