Implement literal suffixes for _BitInt

WG14 adopted N2775 (http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2775.pdf) at our Feb 2022 meeting. This paper adds a literal suffix for bit-precise types that automatically sizes the bit-precise type to be the smallest possible legal _BitInt type that can represent the literal value. The suffix chosen is wb (for a signed bit-precise type) which can be combined with the u suffix (for an unsigned bit-precise type). The preprocessor continues to operate as-if all integer types were intmax_t/uintmax_t, including bit-precise integer types. It is a constraint violation if the bit-precise literal is too large to fit within that type in the context of the preprocessor (when still using a pp-number preprocessing token), but it is not a constraint violation in other circumstances. This allows you to make bit-precise integer literals that are wider than what the preprocessor currently supports in order to initialize variables, etc.
2022-03-14 09:22:37 -04:00 · 2022-03-14 09:22:37 -04:00 · 8cba72177d
parent dc152659b4
commit 8cba72177d
12 changed files with 359 additions and 33 deletions
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@ -111,6 +111,8 @@ C2x Feature Support

 - Implemented `WG14 N2674 The noreturn attribute <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2764.pdf>`_.
 - Implemented `WG14 N2935 Make false and true first-class language features <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2935.pdf>`_.
+- Implemented `WG14 N2763 Adding a fundamental type for N-bit integers <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2763.pdf>`_.
+- Implemented `WG14 N2775 Literal suffixes for bit-precise integers <http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2775.pdf>`_.

 C++ Language Changes in Clang
 -----------------------------
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@ -207,6 +207,12 @@ def err_cxx2b_size_t_suffix: Error<
 def err_size_t_literal_too_large: Error<
  "%select{signed |}0'size_t' literal is out of range of possible "
  "%select{signed |}0'size_t' values">;
+def ext_c2x_bitint_suffix : ExtWarn<
+  "'_BitInt' suffix for literals is a C2x extension">,
+  InGroup<C2x>;
+def warn_c2x_compat_bitint_suffix : Warning<
+  "'_BitInt' suffix for literals is incompatible with C standards before C2x">,
+  InGroup<CPre2xCompat>, DefaultIgnore;
 def err_integer_literal_too_large : Error<
  "integer literal is too large to be represented in any %select{signed |}0"
  "integer type">;
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@ -69,10 +69,11 @@ public:
  bool isImaginary : 1;     // 1.0i
  bool isFloat16 : 1;       // 1.0f16
  bool isFloat128 : 1;      // 1.0q
-  uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
-
  bool isFract : 1;         // 1.0hr/r/lr/uhr/ur/ulr
  bool isAccum : 1;         // 1.0hk/k/lk/uhk/uk/ulk
+  bool isBitInt : 1;        // 1wb, 1uwb (C2x)
+  uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
+

  bool isFixedPointLiteral() const {
    return (saw_period || saw_exponent) && saw_fixed_point_suffix;
@ -120,6 +121,13 @@ public:
  /// calculating the digit sequence of the exponent.
  bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale);

+  /// Get the digits that comprise the literal. This excludes any prefix or
+  /// suffix associated with the literal.
+  StringRef getLiteralDigits() const {
+    assert(!hadError && "cannot reliably get the literal digits with an error");
+    return StringRef(DigitsBegin, SuffixBegin - DigitsBegin);
+  }
+
 private:

  void ParseNumberStartingWithZero(SourceLocation TokLoc);
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@ -1153,6 +1153,11 @@ void StmtPrinter::VisitIntegerLiteral(IntegerLiteral *Node) {
  bool isSigned = Node->getType()->isSignedIntegerType();
  OS << toString(Node->getValue(), 10, isSigned);

+  if (isa<BitIntType>(Node->getType())) {
+    OS << (isSigned ? "wb" : "uwb");
+    return;
+  }
+
  // Emit suffixes.  Integer literals are always a builtin integer type.
  switch (Node->getType()->castAs<BuiltinType>()->getKind()) {
  default: llvm_unreachable("Unexpected type for integer literal!");
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@ -711,6 +711,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
  isFract = false;
  isAccum = false;
  hadError = false;
+  isBitInt = false;

  // This routine assumes that the range begin/end matches the regex for integer
  // and FP constants (specifically, the 'pp-number' regex), and assumes that
@ -895,6 +896,24 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
      if (isImaginary) break;   // Cannot be repeated.
      isImaginary = true;
      continue;  // Success.
+    case 'w':
+    case 'W':
+      if (isFPConstant)
+        break; // Invalid for floats.
+      if (HasSize)
+        break; // Invalid if we already have a size for the literal.
+
+      // wb and WB are allowed, but a mixture of cases like Wb or wB is not. We
+      // explicitly do not support the suffix in C++ as an extension because a
+      // library-based UDL that resolves to a library type may be more
+      // appropriate there.
+      if (!LangOpts.CPlusPlus && (s[0] == 'w' && s[1] == 'b') ||
+          (s[0] == 'W' && s[1] == 'B')) {
+        isBitInt = true;
+        HasSize = true;
+        ++s; // Skip both characters (2nd char skipped on continue).
+        continue; // Success.
+      }
    }
    // If we reached here, there was an error or a ud-suffix.
    break;
@ -916,6 +935,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
        isFloat16 = false;
        isHalf = false;
        isImaginary = false;
+        isBitInt = false;
        MicrosoftInteger = 0;
        saw_fixed_point_suffix = false;
        isFract = false;
@ -1145,8 +1165,14 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
  // floating point constant, the radix will change to 10. Octal floating
  // point constants are not permitted (only decimal and hexadecimal).
  radix = 8;
-  DigitsBegin = s;
+  const char *PossibleNewDigitStart = s;
  s = SkipOctalDigits(s);
+  // When the value is 0 followed by a suffix (like 0wb), we want to leave 0
+  // as the start of the digits. So if skipping octal digits does not skip
+  // anything, we leave the digit start where it was.
+  if (s != PossibleNewDigitStart)
+    DigitsBegin = PossibleNewDigitStart;
+
  if (s == ThisTokEnd)
    return; // Done, simple octal number like 01234

--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@ -331,6 +331,14 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
                                 : diag::ext_cxx2b_size_t_suffix
                           : diag::err_cxx2b_size_t_suffix);

+    // 'wb/uwb' literals are a C2x feature. We explicitly do not support the
+    // suffix in C++ as an extension because a library-based UDL that resolves
+    // to a library type may be more appropriate there.
+    if (Literal.isBitInt)
+      PP.Diag(PeekTok, PP.getLangOpts().C2x
+                           ? diag::warn_c2x_compat_bitint_suffix
+                           : diag::ext_c2x_bitint_suffix);
+
    // Parse the integer literal into Result.
    if (Literal.GetIntegerValue(Result.Val)) {
      // Overflow parsing integer literal.
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@ -3922,9 +3922,27 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
                                        : diag::ext_cxx2b_size_t_suffix
                                  : diag::err_cxx2b_size_t_suffix);

-    // Get the value in the widest-possible width.
-    unsigned MaxWidth = Context.getTargetInfo().getIntMaxTWidth();
-    llvm::APInt ResultVal(MaxWidth, 0);
+    // 'wb/uwb' literals are a C2x feature. We support _BitInt as a type in C++,
+    // but we do not currently support the suffix in C++ mode because it's not
+    // entirely clear whether WG21 will prefer this suffix to return a library
+    // type such as std::bit_int instead of returning a _BitInt.
+    if (Literal.isBitInt && !getLangOpts().CPlusPlus)
+      PP.Diag(Tok.getLocation(), getLangOpts().C2x
+                                     ? diag::warn_c2x_compat_bitint_suffix
+                                     : diag::ext_c2x_bitint_suffix);
+
+    // Get the value in the widest-possible width. What is "widest" depends on
+    // whether the literal is a bit-precise integer or not. For a bit-precise
+    // integer type, try to scan the source to determine how many bits are
+    // needed to represent the value. This may seem a bit expensive, but trying
+    // to get the integer value from an overly-wide APInt is *extremely*
+    // expensive, so the naive approach of assuming
+    // llvm::IntegerType::MAX_INT_BITS is a big performance hit.
+    unsigned BitsNeeded =
+        Literal.isBitInt ? llvm::APInt::getSufficientBitsNeeded(
+                               Literal.getLiteralDigits(), Literal.getRadix())
+                         : Context.getTargetInfo().getIntMaxTWidth();
+    llvm::APInt ResultVal(BitsNeeded, 0);

    if (Literal.GetIntegerValue(ResultVal)) {
      // If this value didn't fit into uintmax_t, error and force to ull.
@ -3956,6 +3974,32 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
        }
      }

+      // Bit-precise integer literals are automagically-sized based on the
+      // width required by the literal.
+      if (Literal.isBitInt) {
+        // The signed version has one more bit for the sign value. There are no
+        // zero-width bit-precise integers, even if the literal value is 0.
+        Width = Literal.isUnsigned ? std::max(ResultVal.getActiveBits(), 1u)
+                                   : std::max(ResultVal.getMinSignedBits(), 2u);
+
+        // Diagnose if the width of the constant is larger than BITINT_MAXWIDTH,
+        // and reset the type to the largest supported width.
+        unsigned int MaxBitIntWidth =
+            Context.getTargetInfo().getMaxBitIntWidth();
+        if (Width > MaxBitIntWidth) {
+          Diag(Tok.getLocation(), diag::err_integer_literal_too_large)
+              << Literal.isUnsigned;
+          Width = MaxBitIntWidth;
+        }
+
+        // Reset the result value to the smaller APInt and select the correct
+        // type to be used. Note, we zext even for signed values because the
+        // literal itself is always an unsigned value (a preceeding - is a
+        // unary operator, not part of the literal).
+        ResultVal = ResultVal.zextOrTrunc(Width);
+        Ty = Context.getBitIntType(Literal.isUnsigned, Width);
+      }
+
      // Check C++2b size_t literals.
      if (Literal.isSizeT) {
        assert(!Literal.MicrosoftInteger &&
--- a/clang/test/AST/bitint-suffix.c
+++ b/clang/test/AST/bitint-suffix.c
@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -std=c2x -ast-dump -Wno-unused %s | FileCheck --strict-whitespace %s
+
+// CHECK: FunctionDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE+1]]:1, line:{{[0-9]*}}:1> line:[[@LINE+1]]:6 func 'void (void)'
+void func(void) {
+  // Ensure that we calculate the correct type from the literal suffix.
+
+  // Note: 0wb should create an _BitInt(2) because a signed bit-precise
+  // integer requires one bit for the sign and one bit for the value,
+  // at a minimum.
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:27> col:27 zero_wb 'typeof (0wb)':'_BitInt(2)'
+  typedef __typeof__(0wb) zero_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 neg_zero_wb 'typeof (-0wb)':'_BitInt(2)'
+  typedef __typeof__(-0wb) neg_zero_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:27> col:27 one_wb 'typeof (1wb)':'_BitInt(2)'
+  typedef __typeof__(1wb) one_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 neg_one_wb 'typeof (-1wb)':'_BitInt(2)'
+  typedef __typeof__(-1wb) neg_one_wb;
+
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 zero_uwb 'typeof (0uwb)':'unsigned _BitInt(1)'
+  typedef __typeof__(0uwb) zero_uwb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 neg_zero_uwb 'typeof (-0uwb)':'unsigned _BitInt(1)'
+  typedef __typeof__(-0uwb) neg_zero_uwb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 one_uwb 'typeof (1uwb)':'unsigned _BitInt(1)'
+  typedef __typeof__(1uwb) one_uwb;
+
+  // Try a value that is too large to fit in [u]intmax_t.
+
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:47> col:47 huge_uwb 'typeof (18446744073709551616uwb)':'unsigned _BitInt(65)'
+  typedef __typeof__(18446744073709551616uwb) huge_uwb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:46> col:46 huge_wb 'typeof (18446744073709551616wb)':'_BitInt(66)'
+  typedef __typeof__(18446744073709551616wb) huge_wb;
+}
+
+// Test the examples from the paper.
+// CHECK: FunctionDecl 0x{{[^ ]*}} <{{.*}}:[[@LINE+1]]:1, line:{{[0-9]*}}:1> line:[[@LINE+1]]:6 from_paper 'void (void)'
+void from_paper(void) {
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 neg_three_wb 'typeof (-3wb)':'_BitInt(3)'
+  typedef __typeof__(-3wb) neg_three_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:30> col:30 neg_three_hex_wb 'typeof (-3wb)':'_BitInt(3)'
+  typedef __typeof__(-0x3wb) neg_three_hex_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:27> col:27 three_wb 'typeof (3wb)':'_BitInt(3)'
+  typedef __typeof__(3wb) three_wb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:28> col:28 three_uwb 'typeof (3uwb)':'unsigned _BitInt(2)'
+  typedef __typeof__(3uwb) three_uwb;
+  // CHECK: TypedefDecl 0x{{[^ ]*}} <col:3, col:29> col:29 neg_three_uwb 'typeof (-3uwb)':'unsigned _BitInt(2)'
+  typedef __typeof__(-3uwb) neg_three_uwb;
+}
--- a/clang/test/Lexer/bitint-constants-compat.c
+++ b/clang/test/Lexer/bitint-constants-compat.c
@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -std=c17 -fsyntax-only -verify=ext -Wno-unused %s
+// RUN: %clang_cc1 -std=c2x -fsyntax-only -verify=compat -Wpre-c2x-compat -Wno-unused %s
+// RUN: %clang_cc1 -fsyntax-only -verify=cpp -Wno-unused -x c++ %s
+
+#if 18446744073709551615uwb // ext-warning {{'_BitInt' suffix for literals is a C2x extension}} \
+                               compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C2x}} \
+                               cpp-error {{invalid suffix 'uwb' on integer constant}}
+#endif
+
+void func(void) {
+  18446744073709551615wb; // ext-warning {{'_BitInt' suffix for literals is a C2x extension}} \
+                             compat-warning {{'_BitInt' suffix for literals is incompatible with C standards before C2x}} \
+                             cpp-error {{invalid suffix 'wb' on integer constant}}
+}
--- a/clang/test/Lexer/bitint-constants.c
+++ b/clang/test/Lexer/bitint-constants.c
@ -0,0 +1,144 @@
+// RUN: %clang_cc1 -std=c2x -fsyntax-only -verify -Wno-unused %s
+
+// Test that the preprocessor behavior makes sense.
+#if 1wb != 1
+#error "wb suffix must be recognized by preprocessor"
+#endif
+#if 1uwb != 1
+#error "uwb suffix must be recognized by preprocessor"
+#endif
+#if !(-1wb < 0)
+#error "wb suffix must be interpreted as signed"
+#endif
+#if !(-1uwb > 0)
+#error "uwb suffix must be interpreted as unsigned"
+#endif
+
+#if 18446744073709551615uwb != 18446744073709551615ULL
+#error "expected the max value for uintmax_t to compare equal"
+#endif
+
+// Test that the preprocessor gives appropriate diagnostics when the
+// literal value is larger than what can be stored in a [u]intmax_t.
+#if 18446744073709551616wb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
+#error "never expected to get here due to error"
+#endif
+#if 18446744073709551616uwb != 0ULL // expected-error {{integer literal is too large to be represented in any integer type}}
+#error "never expected to get here due to error"
+#endif
+
+// Despite using a bit-precise integer, this is expected to overflow
+// because all preprocessor arithmetic is done in [u]intmax_t, so this
+// should result in the value 0.
+#if 18446744073709551615uwb + 1 != 0ULL
+#error "expected modulo arithmetic with uintmax_t width"
+#endif
+
+// Because this bit-precise integer is signed, it will also overflow,
+// but Clang handles that by converting to uintmax_t instead of
+// intmax_t.
+#if 18446744073709551615wb + 1 != 0LL // expected-warning {{integer literal is too large to be represented in a signed integer type, interpreting as unsigned}}
+#error "expected modulo arithmetic with uintmax_t width"
+#endif
+
+// Test that just because the preprocessor can't figure out the bit
+// width doesn't mean we can't form the constant, it just means we
+// can't use the value in a preprocessor conditional.
+unsigned _BitInt(65) Val = 18446744073709551616uwb;
+
+void ValidSuffix(void) {
+  // Decimal literals.
+  1wb;
+  1WB;
+  -1wb;
+  _Static_assert((int)1wb == 1, "not 1?");
+  _Static_assert((int)-1wb == -1, "not -1?");
+
+  1uwb;
+  1uWB;
+  1Uwb;
+  1UWB;
+  _Static_assert((unsigned int)1uwb == 1u, "not 1?");
+
+  1'2wb;
+  1'2uwb;
+  _Static_assert((int)1'2wb == 12, "not 12?");
+  _Static_assert((unsigned int)1'2uwb == 12u, "not 12?");
+
+  // Hexadecimal literals.
+  0x1wb;
+  0x1uwb;
+  0x0'1'2'3wb;
+  0xA'B'c'duwb;
+  _Static_assert((int)0x0'1'2'3wb == 0x0123, "not 0x0123");
+  _Static_assert((unsigned int)0xA'B'c'duwb == 0xABCDu, "not 0xABCD");
+
+  // Binary literals.
+  0b1wb;
+  0b1uwb;
+  0b1'0'1'0'0'1wb;
+  0b0'1'0'1'1'0uwb;
+  _Static_assert((int)0b1wb == 1, "not 1?");
+  _Static_assert((unsigned int)0b1uwb == 1u, "not 1?");
+
+  // Octal literals.
+  01wb;
+  01uwb;
+  0'6'0wb;
+  0'0'1uwb;
+  0wbu;
+  0WBu;
+  0wbU;
+  0WBU;
+  0wb;
+  _Static_assert((int)0wb == 0, "not 0?");
+  _Static_assert((unsigned int)0wbu == 0u, "not 0?");
+
+  // Imaginary or Complex. These are allowed because _Complex can work with any
+  // integer type, and that includes _BitInt.
+  1iwb;
+  1wbj;
+}
+
+void InvalidSuffix(void) {
+  // Can't mix the case of wb or WB, and can't rearrange the letters.
+  0wB; // expected-error {{invalid suffix 'wB' on integer constant}}
+  0Wb; // expected-error {{invalid suffix 'Wb' on integer constant}}
+  0bw; // expected-error {{invalid digit 'b' in octal constant}}
+  0BW; // expected-error {{invalid digit 'B' in octal constant}}
+
+  // Trailing digit separators should still diagnose.
+  1'2'wb; // expected-error {{digit separator cannot appear at end of digit sequence}}
+  1'2'uwb; // expected-error {{digit separator cannot appear at end of digit sequence}}
+
+  // Long.
+  1lwb; // expected-error {{invalid suffix}}
+  1wbl; // expected-error {{invalid suffix}}
+  1luwb; // expected-error {{invalid suffix}}
+  1ulwb;  // expected-error {{invalid suffix}}
+
+  // Long long.
+  1llwb; // expected-error {{invalid suffix}}
+  1uwbll; // expected-error {{invalid suffix}}
+
+  // Floating point.
+  0.1wb;   // expected-error {{invalid suffix}}
+  0.1fwb;   // expected-error {{invalid suffix}}
+
+  // Repetitive suffix.
+  1wbwb; // expected-error {{invalid suffix}}
+  1uwbuwb; // expected-error {{invalid suffix}}
+  1wbuwb; // expected-error {{invalid suffix}}
+  1uwbwb; // expected-error {{invalid suffix}}
+}
+
+void ValidSuffixInvalidValue(void) {
+  // This is a valid suffix, but the value is larger than one that fits within
+  // the width of BITINT_MAXWIDTH. When this value changes in the future, the
+  // test cases should pick a new value that can't be represented by a _BitInt,
+  // but also add a test case that a 129-bit literal still behaves as-expected.
+  _Static_assert(__BITINT_MAXWIDTH__ <= 128,
+	             "Need to pick a bigger constant for the test case below.");
+  0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1wb; // expected-error {{integer literal is too large to be represented in any signed integer type}}
+  0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'1uwb; // expected-error {{integer literal is too large to be represented in any integer type}}
+}
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@ -1506,6 +1506,11 @@ public:
  /// equivalent of the string given by \p str.
  static unsigned getBitsNeeded(StringRef str, uint8_t radix);

+  /// Get the bits that are sufficient to represent the string value. This may
+  /// over estimate the amount of bits required, but it does not require
+  /// parsing the value in the string.
+  static unsigned getSufficientBitsNeeded(StringRef Str, uint8_t Radix);
+
  /// The APInt version of the countLeadingZeros functions in
  ///   MathExtras.h.
  ///
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@ -502,12 +502,51 @@ uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
  return retBits;
 }

-unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
-  assert(!str.empty() && "Invalid string length");
-  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
-          radix == 36) &&
-         "Radix should be 2, 8, 10, 16, or 36!");
+unsigned APInt::getSufficientBitsNeeded(StringRef Str, uint8_t Radix) {
+  assert(!Str.empty() && "Invalid string length");
+  size_t StrLen = Str.size();

+  // Each computation below needs to know if it's negative.
+  unsigned IsNegative = false;
+  if (Str[0] == '-' || Str[0] == '+') {
+    IsNegative = Str[0] == '-';
+    StrLen--;
+    assert(StrLen && "String is only a sign, needs a value.");
+  }
+
+  // For radixes of power-of-two values, the bits required is accurately and
+  // easily computed.
+  if (Radix == 2)
+    return StrLen + IsNegative;
+  if (Radix == 8)
+    return StrLen * 3 + IsNegative;
+  if (Radix == 16)
+    return StrLen * 4 + IsNegative;
+
+  // Compute a sufficient number of bits that is always large enough but might
+  // be too large. This avoids the assertion in the constructor. This
+  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
+  // bits in that case.
+  if (Radix == 10)
+    return (StrLen == 1 ? 4 : StrLen * 64 / 18) + IsNegative;
+
+  assert(Radix == 36);
+  return (StrLen == 1 ? 7 : StrLen * 16 / 3) + IsNegative;
+}
+
+unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
+  // Compute a sufficient number of bits that is always large enough but might
+  // be too large.
+  unsigned sufficient = getSufficientBitsNeeded(str, radix);
+
+  // For bases 2, 8, and 16, the sufficient number of bits is exact and we can
+  // return the value directly. For bases 10 and 36, we need to do extra work.
+  if (radix == 2 || radix == 8 || radix == 16)
+    return sufficient;
+
+  // This is grossly inefficient but accurate. We could probably do something
+  // with a computation of roughly slen*64/20 and then adjust by the value of
+  // the first few digits. But, I'm not sure how accurate that could be.
  size_t slen = str.size();

  // Each computation below needs to know if it's negative.
@ -519,28 +558,6 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
    assert(slen && "String is only a sign, needs a value.");
  }

-  // For radixes of power-of-two values, the bits required is accurately and
-  // easily computed
-  if (radix == 2)
-    return slen + isNegative;
-  if (radix == 8)
-    return slen * 3 + isNegative;
-  if (radix == 16)
-    return slen * 4 + isNegative;
-
-  // FIXME: base 36
-
-  // This is grossly inefficient but accurate. We could probably do something
-  // with a computation of roughly slen*64/20 and then adjust by the value of
-  // the first few digits. But, I'm not sure how accurate that could be.
-
-  // Compute a sufficient number of bits that is always large enough but might
-  // be too large. This avoids the assertion in the constructor. This
-  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
-  // bits in that case.
-  unsigned sufficient
-    = radix == 10? (slen == 1 ? 4 : slen * 64/18)
-                 : (slen == 1 ? 7 : slen * 16/3);

  // Convert to the actual binary value.
  APInt tmp(sufficient, StringRef(p, slen), radix);