[libc] move to combined integer converter

The functions converting integers into decimal, hexadecimal, and octal, are all very similar. This patch moves to a combined converter to save code size. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D131302
2022-08-05 14:51:10 -07:00 · 2022-08-05 14:51:10 -07:00 · 3510082a1c
parent 42e2946f51
commit 3510082a1c
8 changed files with 72 additions and 283 deletions
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@ -63,9 +63,7 @@ add_object_library(
    string_converter.h
    char_converter.h
    int_converter.h
-    hex_converter.h
    ptr_converter.h
-    oct_converter.h
    write_int_converter.h
    float_inf_nan_converter.h
    float_hex_converter.h
@ -74,6 +72,7 @@ add_object_library(
    .core_structs
    libc.src.__support.integer_to_string
    libc.src.__support.CPP.limits
+    libc.src.__support.CPP.string_view
    libc.src.__support.FPUtil.fputil
 )

--- a/libc/src/stdio/printf_core/converter.cpp
+++ b/libc/src/stdio/printf_core/converter.cpp
@ -38,12 +38,10 @@ int convert(Writer *writer, const FormatSection &to_conv) {
  case 'd':
  case 'i':
  case 'u':
-    return convert_int(writer, to_conv);
  case 'o':
-    return convert_oct(writer, to_conv);
  case 'x':
  case 'X':
-    return convert_hex(writer, to_conv);
+    return convert_int(writer, to_conv);
 #ifndef LLVM_LIBC_PRINTF_DISABLE_FLOAT
  // case 'f':
  // case 'F':
--- a/libc/src/stdio/printf_core/converter_atlas.h
+++ b/libc/src/stdio/printf_core/converter_atlas.h
@ -22,11 +22,6 @@
 // defines convert_int
 #include "src/stdio/printf_core/int_converter.h"

-// defines convert_oct
-#include "src/stdio/printf_core/oct_converter.h"
-// defines convert_hex
-#include "src/stdio/printf_core/hex_converter.h"
-
 #ifndef LLVM_LIBC_PRINTF_DISABLE_FLOAT
 // defines convert_float_decimal
 // defines convert_float_dec_exp
--- a/libc/src/stdio/printf_core/core_structs.h
+++ b/libc/src/stdio/printf_core/core_structs.h
@ -86,6 +86,7 @@ constexpr int WRITE_OK = 0;
 constexpr int FILE_WRITE_ERROR = -1;
 constexpr int FILE_STATUS_ERROR = -2;
 constexpr int NULLPTR_WRITE_ERROR = -3;
+constexpr int INT_CONVERSION_ERROR = -4;

 } // namespace printf_core
 } // namespace __llvm_libc
--- a/libc/src/stdio/printf_core/hex_converter.h
+++ b/libc/src/stdio/printf_core/hex_converter.h
@ -1,133 +0,0 @@
-//===-- Hexadecimal Converter for printf ------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_HEX_CONVERTER_H
-#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_HEX_CONVERTER_H
-
-#include "src/stdio/printf_core/converter_utils.h"
-#include "src/stdio/printf_core/core_structs.h"
-#include "src/stdio/printf_core/writer.h"
-
-#include <inttypes.h>
-#include <stddef.h>
-
-namespace __llvm_libc {
-namespace printf_core {
-
-int convert_hex(Writer *writer, const FormatSection &to_conv) {
-  // This approximates the number of digits it takes to represent a hexadecimal
-  // value of a certain number of bits. Each hex digit represents 4 bits, so the
-  // exact value is the number of bytes multiplied by 2.
-  static constexpr size_t BUFF_LEN = sizeof(uintmax_t) * 2;
-  uintmax_t num = to_conv.conv_val_raw;
-  char buffer[BUFF_LEN];
-
-  // All of the characters will be defined relative to variable a, which will be
-  // the appropriate case based on the name of the conversion.
-  char a;
-  if (to_conv.conv_name == 'x')
-    a = 'a';
-  else
-    a = 'A';
-
-  num = apply_length_modifier(num, to_conv.length_modifier);
-
-  // buff_cur can never reach 0, since the buffer is sized to always be able to
-  // contain the whole integer. This means that bounds checking it should be
-  // unnecessary.
-  size_t buff_cur = BUFF_LEN;
-  for (; num > 0 /* && buff_cur > 0 */; --buff_cur, num /= 16)
-    buffer[buff_cur - 1] =
-        ((num % 16) > 9) ? ((num % 16) - 10 + a) : ((num % 16) + '0');
-
-  size_t digits_written = BUFF_LEN - buff_cur;
-
-  // these are signed to prevent underflow due to negative values. The eventual
-  // values will always be non-negative.
-  int zeroes;
-  int spaces;
-
-  // prefix is "0x"
-  int prefix_len;
-  char prefix[2];
-  if ((to_conv.flags & FormatFlags::ALTERNATE_FORM) ==
-      FormatFlags::ALTERNATE_FORM) {
-    prefix_len = 2;
-    prefix[0] = '0';
-    prefix[1] = a + ('x' - 'a');
-  } else {
-    prefix_len = 0;
-    prefix[0] = 0;
-  }
-
-  // negative precision indicates that it was not specified.
-  if (to_conv.precision < 0) {
-    if ((to_conv.flags &
-         (FormatFlags::LEADING_ZEROES | FormatFlags::LEFT_JUSTIFIED)) ==
-        FormatFlags::LEADING_ZEROES) {
-      // if this conv has flag 0 but not - and no specified precision, it's
-      // padded with 0's instead of spaces identically to if precision =
-      // min_width - (2 if prefix). For example: ("%#04x", 15) -> "0x0f"
-      zeroes = to_conv.min_width - digits_written - prefix_len;
-      if (zeroes < 0)
-        zeroes = 0;
-      spaces = 0;
-    } else if (digits_written < 1) {
-      // if no precision is specified, precision defaults to 1. This means that
-      // if the integer passed to the conversion is 0, a 0 will be printed.
-      // Example: ("%3x", 0) -> "  0"
-      zeroes = 1;
-      spaces = to_conv.min_width - zeroes - prefix_len;
-    } else {
-      // If there are enough digits to pass over the precision, just write the
-      // number, padded by spaces.
-      zeroes = 0;
-      spaces = to_conv.min_width - digits_written - prefix_len;
-    }
-  } else {
-    // if precision was specified, possibly write zeroes, and possibly write
-    // spaces. Example: ("%5.4x", 0x10000) -> "10000"
-    // If the check for if zeroes is negative was not there, spaces would be
-    // incorrectly evaluated as 1.
-    zeroes = to_conv.precision - digits_written; // a negative value means 0
-    if (zeroes < 0)
-      zeroes = 0;
-    spaces = to_conv.min_width - zeroes - digits_written - prefix_len;
-  }
-  if (spaces < 0)
-    spaces = 0;
-
-  if ((to_conv.flags & FormatFlags::LEFT_JUSTIFIED) ==
-      FormatFlags::LEFT_JUSTIFIED) {
-    // if left justified it goes prefix zeroes digits spaces
-    if (prefix[0] != 0)
-      RET_IF_RESULT_NEGATIVE(writer->write(prefix, 2));
-    if (zeroes > 0)
-      RET_IF_RESULT_NEGATIVE(writer->write_chars('0', zeroes));
-    if (digits_written > 0)
-      RET_IF_RESULT_NEGATIVE(writer->write(buffer + buff_cur, digits_written));
-    if (spaces > 0)
-      RET_IF_RESULT_NEGATIVE(writer->write_chars(' ', spaces));
-  } else {
-    // else it goes spaces prefix zeroes digits
-    if (spaces > 0)
-      RET_IF_RESULT_NEGATIVE(writer->write_chars(' ', spaces));
-    if (prefix[0] != 0)
-      RET_IF_RESULT_NEGATIVE(writer->write(prefix, 2));
-    if (zeroes > 0)
-      RET_IF_RESULT_NEGATIVE(writer->write_chars('0', zeroes));
-    if (digits_written > 0)
-      RET_IF_RESULT_NEGATIVE(writer->write(buffer + buff_cur, digits_written));
-  }
-  return WRITE_OK;
-}
-
-} // namespace printf_core
-} // namespace __llvm_libc
-
-#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_HEX_CONVERTER_H
--- a/libc/src/stdio/printf_core/int_converter.h
+++ b/libc/src/stdio/printf_core/int_converter.h
@ -9,6 +9,8 @@
 #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H

+#include "src/__support/CPP/ArrayRef.h"
+#include "src/__support/CPP/StringView.h"
 #include "src/__support/integer_to_string.h"
 #include "src/stdio/printf_core/converter_utils.h"
 #include "src/stdio/printf_core/core_structs.h"
@ -20,6 +22,23 @@
 namespace __llvm_libc {
 namespace printf_core {

+// These functions only work on characters that are already known to be in the
+// alphabet. Their behavior is undefined otherwise.
+constexpr char inline to_lower(char a) { return a | 32; }
+constexpr bool inline is_lower(char a) { return (a & 32) > 0; }
+
+cpp::optional<cpp::StringView> inline num_to_strview(
+    uintmax_t num, cpp::MutableArrayRef<char> bufref, char conv_name) {
+  if (to_lower(conv_name) == 'x') {
+    return IntegerToString<uintmax_t, 16>::convert(num, bufref,
+                                                   is_lower(conv_name));
+  } else if (conv_name == 'o') {
+    return IntegerToString<uintmax_t, 8>::convert(num, bufref, true);
+  } else {
+    return IntegerToString<uintmax_t, 10>::convert(num, bufref, true);
+  }
+}
+
 int inline convert_int(Writer *writer, const FormatSection &to_conv) {
  static constexpr size_t BITS_IN_BYTE = 8;
  static constexpr size_t BITS_IN_NUM = sizeof(uintmax_t) * BITS_IN_BYTE;
@ -28,24 +47,33 @@ int inline convert_int(Writer *writer, const FormatSection &to_conv) {
  bool is_negative = false;
  FormatFlags flags = to_conv.flags;

-  if (to_conv.conv_name == 'u') {
-    // These flags are only for signed conversions, so this removes them if the
-    // conversion is unsigned.
-    flags = FormatFlags(flags &
-                        ~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
-  } else {
+  const char a = is_lower(to_conv.conv_name) ? 'a' : 'A';
+
+  // If the conversion is signed, then handle negative values.
+  if (to_conv.conv_name == 'd' || to_conv.conv_name == 'i') {
    // Check if the number is negative by checking the high bit. This works even
    // for smaller numbers because they're sign extended by default.
    if ((num & (uintmax_t(1) << (BITS_IN_NUM - 1))) > 0) {
      is_negative = true;
      num = -num;
    }
+  } else {
+    // These flags are only for signed conversions, so this removes them if the
+    // conversion is unsigned.
+    flags = FormatFlags(flags &
+                        ~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
  }

  num = apply_length_modifier(num, to_conv.length_modifier);

-  auto const int_to_str = integer_to_string(num);
-  size_t digits_written = int_to_str.str().size();
+  static constexpr size_t BUFSIZE = IntegerToString<uintmax_t, 8>::BUFSIZE;
+  char buff[BUFSIZE];
+  cpp::MutableArrayRef<char> bufref(buff, BUFSIZE);
+  auto str = num_to_strview(num, bufref, to_conv.conv_name);
+  if (!str)
+    return INT_CONVERSION_ERROR;
+
+  size_t digits_written = str->size();

  char sign_char = 0;

@ -56,13 +84,25 @@ int inline convert_int(Writer *writer, const FormatSection &to_conv) {
  else if ((flags & FormatFlags::SPACE_PREFIX) == FormatFlags::SPACE_PREFIX)
    sign_char = ' ';

-  int sign_char_len = (sign_char == 0 ? 0 : 1);
-
  // These are signed to prevent underflow due to negative values. The eventual
  // values will always be non-negative.
  int zeroes;
  int spaces;

+  // prefix is "0x" for hexadecimal, or the sign character for signed
+  // conversions. Since hexadecimal is unsigned these will never conflict.
+  int prefix_len;
+  char prefix[2];
+  if ((to_lower(to_conv.conv_name) == 'x') &&
+      ((flags & FormatFlags::ALTERNATE_FORM) != 0)) {
+    prefix_len = 2;
+    prefix[0] = '0';
+    prefix[1] = a + ('x' - 'a');
+  } else {
+    prefix_len = (sign_char == 0 ? 0 : 1);
+    prefix[0] = sign_char;
+  }
+
  // Negative precision indicates that it was not specified.
  if (to_conv.precision < 0) {
    if ((flags & (FormatFlags::LEADING_ZEROES | FormatFlags::LEFT_JUSTIFIED)) ==
@ -70,15 +110,13 @@ int inline convert_int(Writer *writer, const FormatSection &to_conv) {
      // If this conv has flag 0 but not - and no specified precision, it's
      // padded with 0's instead of spaces identically to if precision =
      // min_width - (1 if sign_char). For example: ("%+04d", 1) -> "+001"
-      zeroes = to_conv.min_width - digits_written - sign_char_len;
-      if (zeroes < 0)
-        zeroes = 0;
+      zeroes = to_conv.min_width - digits_written - prefix_len;
      spaces = 0;
    } else {
      // If there are enough digits to pass over the precision, just write the
      // number, padded by spaces.
      zeroes = 0;
-      spaces = to_conv.min_width - digits_written - sign_char_len;
+      spaces = to_conv.min_width - digits_written - prefix_len;
    }
  } else {
    // If precision was specified, possibly write zeroes, and possibly write
@ -94,33 +132,35 @@ int inline convert_int(Writer *writer, const FormatSection &to_conv) {
    zeroes = to_conv.precision - digits_written; // a negative value means 0
    if (zeroes < 0)
      zeroes = 0;
-    spaces = to_conv.min_width - zeroes - digits_written - sign_char_len;
+    spaces = to_conv.min_width - zeroes - digits_written - prefix_len;
+  }
+
+  if ((to_conv.conv_name == 'o') &&
+      ((to_conv.flags & FormatFlags::ALTERNATE_FORM) != 0) && zeroes < 1) {
+    zeroes = 1;
+    --spaces;
  }
-  if (spaces < 0)
-    spaces = 0;

  if ((flags & FormatFlags::LEFT_JUSTIFIED) == FormatFlags::LEFT_JUSTIFIED) {
-    // If left justified it goes sign zeroes digits spaces
-    if (sign_char != 0)
-      RET_IF_RESULT_NEGATIVE(writer->write(&sign_char, 1));
+    // If left justified it goes prefix zeroes digits spaces
+    if (prefix_len != 0)
+      RET_IF_RESULT_NEGATIVE(writer->write(prefix, prefix_len));
    if (zeroes > 0)
      RET_IF_RESULT_NEGATIVE(writer->write_chars('0', zeroes));
    if (digits_written > 0)
-      RET_IF_RESULT_NEGATIVE(
-          writer->write(int_to_str.str().data(), digits_written));
+      RET_IF_RESULT_NEGATIVE(writer->write(str->data(), digits_written));
    if (spaces > 0)
      RET_IF_RESULT_NEGATIVE(writer->write_chars(' ', spaces));
  } else {
-    // Else it goes spaces sign zeroes digits
+    // Else it goes spaces prefix zeroes digits
    if (spaces > 0)
      RET_IF_RESULT_NEGATIVE(writer->write_chars(' ', spaces));
-    if (sign_char != 0)
-      RET_IF_RESULT_NEGATIVE(writer->write(&sign_char, 1));
+    if (prefix_len != 0)
+      RET_IF_RESULT_NEGATIVE(writer->write(prefix, prefix_len));
    if (zeroes > 0)
      RET_IF_RESULT_NEGATIVE(writer->write_chars('0', zeroes));
    if (digits_written > 0)
-      RET_IF_RESULT_NEGATIVE(
-          writer->write(int_to_str.str().data(), digits_written));
+      RET_IF_RESULT_NEGATIVE(writer->write(str->data(), digits_written));
  }
  return WRITE_OK;
 }
--- a/libc/src/stdio/printf_core/oct_converter.h
+++ b/libc/src/stdio/printf_core/oct_converter.h
@ -1,111 +0,0 @@
-//===-- Octal Converter for printf ------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_OCT_CONVERTER_H
-#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_OCT_CONVERTER_H
-
-#include "src/stdio/printf_core/converter_utils.h"
-#include "src/stdio/printf_core/core_structs.h"
-#include "src/stdio/printf_core/writer.h"
-
-#include <inttypes.h>
-#include <stddef.h>
-
-namespace __llvm_libc {
-namespace printf_core {
-
-int inline convert_oct(Writer *writer, const FormatSection &to_conv) {
-  // This is the number of digits it takes to represent a octal value of a
-  // certain number of bits. Each oct digit represents 3 bits, so the value is
-  // ceil(number of bits / 3).
-  constexpr size_t BUFF_LEN = ((sizeof(uintmax_t) * 8) + 2) / 3;
-  uintmax_t num = to_conv.conv_val_raw;
-  char buffer[BUFF_LEN];
-
-  num = apply_length_modifier(num, to_conv.length_modifier);
-
-  // Since the buffer is size to sized to be able fit the entire number, buf_cur
-  // can never reach 0. So, we do not need bounds checking on buf_cur.
-  size_t buff_cur = BUFF_LEN;
-  for (; num > 0 /* && buff_cur > 0 */; --buff_cur, num /= 8)
-    buffer[buff_cur - 1] = (num % 8) + '0';
-
-  size_t num_digits = BUFF_LEN - buff_cur;
-
-  // These are signed to prevent underflow due to negative values. Negative
-  // values are treated the same as 0.
-  int zeroes;
-  int spaces;
-
-  // Negative precision indicates that it was not specified.
-  if (to_conv.precision < 0) {
-    if ((to_conv.flags &
-         (FormatFlags::LEADING_ZEROES | FormatFlags::LEFT_JUSTIFIED)) ==
-        FormatFlags::LEADING_ZEROES) {
-      // If this conv has flag 0 but not - and no specified precision, it's
-      // padded with 0's instead of spaces identically to if precision =
-      // min_width. For example: ("%04o", 15) -> "0017"
-      zeroes = to_conv.min_width - num_digits;
-      spaces = 0;
-    } else if (num_digits < 1) {
-      // If no precision is specified, precision defaults to 1. This means that
-      // if the integer passed to the conversion is 0, a 0 will be printed.
-      // Example: ("%3o", 0) -> "  0"
-      zeroes = 1;
-      spaces = to_conv.min_width - zeroes;
-    } else {
-      // If there are enough digits to pass over the precision, just write the
-      // number, padded by spaces.
-      zeroes = 0;
-      spaces = to_conv.min_width - num_digits;
-    }
-  } else {
-    // If precision was specified, possibly write zeroes, and possibly write
-    // spaces. Example: ("%5.4o", 010000) -> "10000"
-    // If the check for if zeroes is negative was not there, spaces would be
-    // incorrectly evaluated as 1.
-    zeroes = to_conv.precision - num_digits; // a negative value means 0
-    if (zeroes < 0)
-      zeroes = 0;
-    spaces = to_conv.min_width - zeroes - num_digits;
-  }
-
-  // The alternate form prefix is "0", so it's handled by increasing the number
-  // of zeroes if necessary.
-  if (((to_conv.flags & FormatFlags::ALTERNATE_FORM) ==
-       FormatFlags::ALTERNATE_FORM) &&
-      zeroes < 1) {
-    zeroes = 1;
-    --spaces;
-  }
-
-  if ((to_conv.flags & FormatFlags::LEFT_JUSTIFIED) ==
-      FormatFlags::LEFT_JUSTIFIED) {
-    // If left justified the pattern is zeroes digits spaces
-    if (zeroes > 0)
-      RET_IF_RESULT_NEGATIVE(writer->write_chars('0', zeroes));
-    if (num_digits > 0)
-      RET_IF_RESULT_NEGATIVE(writer->write(buffer + buff_cur, num_digits));
-    if (spaces > 0)
-      RET_IF_RESULT_NEGATIVE(writer->write_chars(' ', spaces));
-  } else {
-    // Else the pattern is spaces zeroes digits
-    if (spaces > 0)
-      RET_IF_RESULT_NEGATIVE(writer->write_chars(' ', spaces));
-    if (zeroes > 0)
-      RET_IF_RESULT_NEGATIVE(writer->write_chars('0', zeroes));
-    if (num_digits > 0)
-      RET_IF_RESULT_NEGATIVE(writer->write(buffer + buff_cur, num_digits));
-  }
-  return WRITE_OK;
-}
-
-} // namespace printf_core
-} // namespace __llvm_libc
-
-#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_OCT_CONVERTER_H
--- a/libc/src/stdio/printf_core/ptr_converter.h
+++ b/libc/src/stdio/printf_core/ptr_converter.h
@ -11,7 +11,7 @@

 #include "src/stdio/printf_core/converter_utils.h"
 #include "src/stdio/printf_core/core_structs.h"
-#include "src/stdio/printf_core/hex_converter.h"
+#include "src/stdio/printf_core/int_converter.h"
 #include "src/stdio/printf_core/writer.h"

 namespace __llvm_libc {
@ -28,7 +28,7 @@ int inline convert_pointer(Writer *writer, const FormatSection &to_conv) {
    hex_conv.conv_name = 'x';
    hex_conv.flags = FormatFlags::ALTERNATE_FORM;
    hex_conv.conv_val_raw = reinterpret_cast<uintptr_t>(to_conv.conv_val_ptr);
-    return convert_hex(writer, hex_conv);
+    return convert_int(writer, hex_conv);
  }
  return WRITE_OK;
 }