[libc] move to combined integer converter

The functions converting integers into decimal, hexadecimal, and octal,
are all very similar. This patch moves to a combined converter to save
code size.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D131302
This commit is contained in:
Michael Jones 2022-08-05 14:51:10 -07:00
parent 42e2946f51
commit 3510082a1c
8 changed files with 72 additions and 283 deletions

View File

@ -63,9 +63,7 @@ add_object_library(
string_converter.h
char_converter.h
int_converter.h
hex_converter.h
ptr_converter.h
oct_converter.h
write_int_converter.h
float_inf_nan_converter.h
float_hex_converter.h
@ -74,6 +72,7 @@ add_object_library(
.core_structs
libc.src.__support.integer_to_string
libc.src.__support.CPP.limits
libc.src.__support.CPP.string_view
libc.src.__support.FPUtil.fputil
)

View File

@ -38,12 +38,10 @@ int convert(Writer *writer, const FormatSection &to_conv) {
case 'd':
case 'i':
case 'u':
return convert_int(writer, to_conv);
case 'o':
return convert_oct(writer, to_conv);
case 'x':
case 'X':
return convert_hex(writer, to_conv);
return convert_int(writer, to_conv);
#ifndef LLVM_LIBC_PRINTF_DISABLE_FLOAT
// case 'f':
// case 'F':

View File

@ -22,11 +22,6 @@
// defines convert_int
#include "src/stdio/printf_core/int_converter.h"
// defines convert_oct
#include "src/stdio/printf_core/oct_converter.h"
// defines convert_hex
#include "src/stdio/printf_core/hex_converter.h"
#ifndef LLVM_LIBC_PRINTF_DISABLE_FLOAT
// defines convert_float_decimal
// defines convert_float_dec_exp

View File

@ -86,6 +86,7 @@ constexpr int WRITE_OK = 0;
constexpr int FILE_WRITE_ERROR = -1;
constexpr int FILE_STATUS_ERROR = -2;
constexpr int NULLPTR_WRITE_ERROR = -3;
constexpr int INT_CONVERSION_ERROR = -4;
} // namespace printf_core
} // namespace __llvm_libc

View File

@ -1,133 +0,0 @@
//===-- Hexadecimal Converter for printf ------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_HEX_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_HEX_CONVERTER_H
#include "src/stdio/printf_core/converter_utils.h"
#include "src/stdio/printf_core/core_structs.h"
#include "src/stdio/printf_core/writer.h"
#include <inttypes.h>
#include <stddef.h>
namespace __llvm_libc {
namespace printf_core {
int convert_hex(Writer *writer, const FormatSection &to_conv) {
// This approximates the number of digits it takes to represent a hexadecimal
// value of a certain number of bits. Each hex digit represents 4 bits, so the
// exact value is the number of bytes multiplied by 2.
static constexpr size_t BUFF_LEN = sizeof(uintmax_t) * 2;
uintmax_t num = to_conv.conv_val_raw;
char buffer[BUFF_LEN];
// All of the characters will be defined relative to variable a, which will be
// the appropriate case based on the name of the conversion.
char a;
if (to_conv.conv_name == 'x')
a = 'a';
else
a = 'A';
num = apply_length_modifier(num, to_conv.length_modifier);
// buff_cur can never reach 0, since the buffer is sized to always be able to
// contain the whole integer. This means that bounds checking it should be
// unnecessary.
size_t buff_cur = BUFF_LEN;
for (; num > 0 /* && buff_cur > 0 */; --buff_cur, num /= 16)
buffer[buff_cur - 1] =
((num % 16) > 9) ? ((num % 16) - 10 + a) : ((num % 16) + '0');
size_t digits_written = BUFF_LEN - buff_cur;
// these are signed to prevent underflow due to negative values. The eventual
// values will always be non-negative.
int zeroes;
int spaces;
// prefix is "0x"
int prefix_len;
char prefix[2];
if ((to_conv.flags & FormatFlags::ALTERNATE_FORM) ==
FormatFlags::ALTERNATE_FORM) {
prefix_len = 2;
prefix[0] = '0';
prefix[1] = a + ('x' - 'a');
} else {
prefix_len = 0;
prefix[0] = 0;
}
// negative precision indicates that it was not specified.
if (to_conv.precision < 0) {
if ((to_conv.flags &
(FormatFlags::LEADING_ZEROES | FormatFlags::LEFT_JUSTIFIED)) ==
FormatFlags::LEADING_ZEROES) {
// if this conv has flag 0 but not - and no specified precision, it's
// padded with 0's instead of spaces identically to if precision =
// min_width - (2 if prefix). For example: ("%#04x", 15) -> "0x0f"
zeroes = to_conv.min_width - digits_written - prefix_len;
if (zeroes < 0)
zeroes = 0;
spaces = 0;
} else if (digits_written < 1) {
// if no precision is specified, precision defaults to 1. This means that
// if the integer passed to the conversion is 0, a 0 will be printed.
// Example: ("%3x", 0) -> " 0"
zeroes = 1;
spaces = to_conv.min_width - zeroes - prefix_len;
} else {
// If there are enough digits to pass over the precision, just write the
// number, padded by spaces.
zeroes = 0;
spaces = to_conv.min_width - digits_written - prefix_len;
}
} else {
// if precision was specified, possibly write zeroes, and possibly write
// spaces. Example: ("%5.4x", 0x10000) -> "10000"
// If the check for if zeroes is negative was not there, spaces would be
// incorrectly evaluated as 1.
zeroes = to_conv.precision - digits_written; // a negative value means 0
if (zeroes < 0)
zeroes = 0;
spaces = to_conv.min_width - zeroes - digits_written - prefix_len;
}
if (spaces < 0)
spaces = 0;
if ((to_conv.flags & FormatFlags::LEFT_JUSTIFIED) ==
FormatFlags::LEFT_JUSTIFIED) {
// if left justified it goes prefix zeroes digits spaces
if (prefix[0] != 0)
RET_IF_RESULT_NEGATIVE(writer->write(prefix, 2));
if (zeroes > 0)
RET_IF_RESULT_NEGATIVE(writer->write_chars('0', zeroes));
if (digits_written > 0)
RET_IF_RESULT_NEGATIVE(writer->write(buffer + buff_cur, digits_written));
if (spaces > 0)
RET_IF_RESULT_NEGATIVE(writer->write_chars(' ', spaces));
} else {
// else it goes spaces prefix zeroes digits
if (spaces > 0)
RET_IF_RESULT_NEGATIVE(writer->write_chars(' ', spaces));
if (prefix[0] != 0)
RET_IF_RESULT_NEGATIVE(writer->write(prefix, 2));
if (zeroes > 0)
RET_IF_RESULT_NEGATIVE(writer->write_chars('0', zeroes));
if (digits_written > 0)
RET_IF_RESULT_NEGATIVE(writer->write(buffer + buff_cur, digits_written));
}
return WRITE_OK;
}
} // namespace printf_core
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_HEX_CONVERTER_H

View File

@ -9,6 +9,8 @@
#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
#include "src/__support/CPP/ArrayRef.h"
#include "src/__support/CPP/StringView.h"
#include "src/__support/integer_to_string.h"
#include "src/stdio/printf_core/converter_utils.h"
#include "src/stdio/printf_core/core_structs.h"
@ -20,6 +22,23 @@
namespace __llvm_libc {
namespace printf_core {
// These functions only work on characters that are already known to be in the
// alphabet. Their behavior is undefined otherwise.
constexpr char inline to_lower(char a) { return a | 32; }
constexpr bool inline is_lower(char a) { return (a & 32) > 0; }
cpp::optional<cpp::StringView> inline num_to_strview(
uintmax_t num, cpp::MutableArrayRef<char> bufref, char conv_name) {
if (to_lower(conv_name) == 'x') {
return IntegerToString<uintmax_t, 16>::convert(num, bufref,
is_lower(conv_name));
} else if (conv_name == 'o') {
return IntegerToString<uintmax_t, 8>::convert(num, bufref, true);
} else {
return IntegerToString<uintmax_t, 10>::convert(num, bufref, true);
}
}
int inline convert_int(Writer *writer, const FormatSection &to_conv) {
static constexpr size_t BITS_IN_BYTE = 8;
static constexpr size_t BITS_IN_NUM = sizeof(uintmax_t) * BITS_IN_BYTE;
@ -28,24 +47,33 @@ int inline convert_int(Writer *writer, const FormatSection &to_conv) {
bool is_negative = false;
FormatFlags flags = to_conv.flags;
if (to_conv.conv_name == 'u') {
// These flags are only for signed conversions, so this removes them if the
// conversion is unsigned.
flags = FormatFlags(flags &
~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
} else {
const char a = is_lower(to_conv.conv_name) ? 'a' : 'A';
// If the conversion is signed, then handle negative values.
if (to_conv.conv_name == 'd' || to_conv.conv_name == 'i') {
// Check if the number is negative by checking the high bit. This works even
// for smaller numbers because they're sign extended by default.
if ((num & (uintmax_t(1) << (BITS_IN_NUM - 1))) > 0) {
is_negative = true;
num = -num;
}
} else {
// These flags are only for signed conversions, so this removes them if the
// conversion is unsigned.
flags = FormatFlags(flags &
~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
}
num = apply_length_modifier(num, to_conv.length_modifier);
auto const int_to_str = integer_to_string(num);
size_t digits_written = int_to_str.str().size();
static constexpr size_t BUFSIZE = IntegerToString<uintmax_t, 8>::BUFSIZE;
char buff[BUFSIZE];
cpp::MutableArrayRef<char> bufref(buff, BUFSIZE);
auto str = num_to_strview(num, bufref, to_conv.conv_name);
if (!str)
return INT_CONVERSION_ERROR;
size_t digits_written = str->size();
char sign_char = 0;
@ -56,13 +84,25 @@ int inline convert_int(Writer *writer, const FormatSection &to_conv) {
else if ((flags & FormatFlags::SPACE_PREFIX) == FormatFlags::SPACE_PREFIX)
sign_char = ' ';
int sign_char_len = (sign_char == 0 ? 0 : 1);
// These are signed to prevent underflow due to negative values. The eventual
// values will always be non-negative.
int zeroes;
int spaces;
// prefix is "0x" for hexadecimal, or the sign character for signed
// conversions. Since hexadecimal is unsigned these will never conflict.
int prefix_len;
char prefix[2];
if ((to_lower(to_conv.conv_name) == 'x') &&
((flags & FormatFlags::ALTERNATE_FORM) != 0)) {
prefix_len = 2;
prefix[0] = '0';
prefix[1] = a + ('x' - 'a');
} else {
prefix_len = (sign_char == 0 ? 0 : 1);
prefix[0] = sign_char;
}
// Negative precision indicates that it was not specified.
if (to_conv.precision < 0) {
if ((flags & (FormatFlags::LEADING_ZEROES | FormatFlags::LEFT_JUSTIFIED)) ==
@ -70,15 +110,13 @@ int inline convert_int(Writer *writer, const FormatSection &to_conv) {
// If this conv has flag 0 but not - and no specified precision, it's
// padded with 0's instead of spaces identically to if precision =
// min_width - (1 if sign_char). For example: ("%+04d", 1) -> "+001"
zeroes = to_conv.min_width - digits_written - sign_char_len;
if (zeroes < 0)
zeroes = 0;
zeroes = to_conv.min_width - digits_written - prefix_len;
spaces = 0;
} else {
// If there are enough digits to pass over the precision, just write the
// number, padded by spaces.
zeroes = 0;
spaces = to_conv.min_width - digits_written - sign_char_len;
spaces = to_conv.min_width - digits_written - prefix_len;
}
} else {
// If precision was specified, possibly write zeroes, and possibly write
@ -94,33 +132,35 @@ int inline convert_int(Writer *writer, const FormatSection &to_conv) {
zeroes = to_conv.precision - digits_written; // a negative value means 0
if (zeroes < 0)
zeroes = 0;
spaces = to_conv.min_width - zeroes - digits_written - sign_char_len;
spaces = to_conv.min_width - zeroes - digits_written - prefix_len;
}
if ((to_conv.conv_name == 'o') &&
((to_conv.flags & FormatFlags::ALTERNATE_FORM) != 0) && zeroes < 1) {
zeroes = 1;
--spaces;
}
if (spaces < 0)
spaces = 0;
if ((flags & FormatFlags::LEFT_JUSTIFIED) == FormatFlags::LEFT_JUSTIFIED) {
// If left justified it goes sign zeroes digits spaces
if (sign_char != 0)
RET_IF_RESULT_NEGATIVE(writer->write(&sign_char, 1));
// If left justified it goes prefix zeroes digits spaces
if (prefix_len != 0)
RET_IF_RESULT_NEGATIVE(writer->write(prefix, prefix_len));
if (zeroes > 0)
RET_IF_RESULT_NEGATIVE(writer->write_chars('0', zeroes));
if (digits_written > 0)
RET_IF_RESULT_NEGATIVE(
writer->write(int_to_str.str().data(), digits_written));
RET_IF_RESULT_NEGATIVE(writer->write(str->data(), digits_written));
if (spaces > 0)
RET_IF_RESULT_NEGATIVE(writer->write_chars(' ', spaces));
} else {
// Else it goes spaces sign zeroes digits
// Else it goes spaces prefix zeroes digits
if (spaces > 0)
RET_IF_RESULT_NEGATIVE(writer->write_chars(' ', spaces));
if (sign_char != 0)
RET_IF_RESULT_NEGATIVE(writer->write(&sign_char, 1));
if (prefix_len != 0)
RET_IF_RESULT_NEGATIVE(writer->write(prefix, prefix_len));
if (zeroes > 0)
RET_IF_RESULT_NEGATIVE(writer->write_chars('0', zeroes));
if (digits_written > 0)
RET_IF_RESULT_NEGATIVE(
writer->write(int_to_str.str().data(), digits_written));
RET_IF_RESULT_NEGATIVE(writer->write(str->data(), digits_written));
}
return WRITE_OK;
}

View File

@ -1,111 +0,0 @@
//===-- Octal Converter for printf ------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_OCT_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_OCT_CONVERTER_H
#include "src/stdio/printf_core/converter_utils.h"
#include "src/stdio/printf_core/core_structs.h"
#include "src/stdio/printf_core/writer.h"
#include <inttypes.h>
#include <stddef.h>
namespace __llvm_libc {
namespace printf_core {
int inline convert_oct(Writer *writer, const FormatSection &to_conv) {
// This is the number of digits it takes to represent a octal value of a
// certain number of bits. Each oct digit represents 3 bits, so the value is
// ceil(number of bits / 3).
constexpr size_t BUFF_LEN = ((sizeof(uintmax_t) * 8) + 2) / 3;
uintmax_t num = to_conv.conv_val_raw;
char buffer[BUFF_LEN];
num = apply_length_modifier(num, to_conv.length_modifier);
// Since the buffer is size to sized to be able fit the entire number, buf_cur
// can never reach 0. So, we do not need bounds checking on buf_cur.
size_t buff_cur = BUFF_LEN;
for (; num > 0 /* && buff_cur > 0 */; --buff_cur, num /= 8)
buffer[buff_cur - 1] = (num % 8) + '0';
size_t num_digits = BUFF_LEN - buff_cur;
// These are signed to prevent underflow due to negative values. Negative
// values are treated the same as 0.
int zeroes;
int spaces;
// Negative precision indicates that it was not specified.
if (to_conv.precision < 0) {
if ((to_conv.flags &
(FormatFlags::LEADING_ZEROES | FormatFlags::LEFT_JUSTIFIED)) ==
FormatFlags::LEADING_ZEROES) {
// If this conv has flag 0 but not - and no specified precision, it's
// padded with 0's instead of spaces identically to if precision =
// min_width. For example: ("%04o", 15) -> "0017"
zeroes = to_conv.min_width - num_digits;
spaces = 0;
} else if (num_digits < 1) {
// If no precision is specified, precision defaults to 1. This means that
// if the integer passed to the conversion is 0, a 0 will be printed.
// Example: ("%3o", 0) -> " 0"
zeroes = 1;
spaces = to_conv.min_width - zeroes;
} else {
// If there are enough digits to pass over the precision, just write the
// number, padded by spaces.
zeroes = 0;
spaces = to_conv.min_width - num_digits;
}
} else {
// If precision was specified, possibly write zeroes, and possibly write
// spaces. Example: ("%5.4o", 010000) -> "10000"
// If the check for if zeroes is negative was not there, spaces would be
// incorrectly evaluated as 1.
zeroes = to_conv.precision - num_digits; // a negative value means 0
if (zeroes < 0)
zeroes = 0;
spaces = to_conv.min_width - zeroes - num_digits;
}
// The alternate form prefix is "0", so it's handled by increasing the number
// of zeroes if necessary.
if (((to_conv.flags & FormatFlags::ALTERNATE_FORM) ==
FormatFlags::ALTERNATE_FORM) &&
zeroes < 1) {
zeroes = 1;
--spaces;
}
if ((to_conv.flags & FormatFlags::LEFT_JUSTIFIED) ==
FormatFlags::LEFT_JUSTIFIED) {
// If left justified the pattern is zeroes digits spaces
if (zeroes > 0)
RET_IF_RESULT_NEGATIVE(writer->write_chars('0', zeroes));
if (num_digits > 0)
RET_IF_RESULT_NEGATIVE(writer->write(buffer + buff_cur, num_digits));
if (spaces > 0)
RET_IF_RESULT_NEGATIVE(writer->write_chars(' ', spaces));
} else {
// Else the pattern is spaces zeroes digits
if (spaces > 0)
RET_IF_RESULT_NEGATIVE(writer->write_chars(' ', spaces));
if (zeroes > 0)
RET_IF_RESULT_NEGATIVE(writer->write_chars('0', zeroes));
if (num_digits > 0)
RET_IF_RESULT_NEGATIVE(writer->write(buffer + buff_cur, num_digits));
}
return WRITE_OK;
}
} // namespace printf_core
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_OCT_CONVERTER_H

View File

@ -11,7 +11,7 @@
#include "src/stdio/printf_core/converter_utils.h"
#include "src/stdio/printf_core/core_structs.h"
#include "src/stdio/printf_core/hex_converter.h"
#include "src/stdio/printf_core/int_converter.h"
#include "src/stdio/printf_core/writer.h"
namespace __llvm_libc {
@ -28,7 +28,7 @@ int inline convert_pointer(Writer *writer, const FormatSection &to_conv) {
hex_conv.conv_name = 'x';
hex_conv.flags = FormatFlags::ALTERNATE_FORM;
hex_conv.conv_val_raw = reinterpret_cast<uintptr_t>(to_conv.conv_val_ptr);
return convert_hex(writer, hex_conv);
return convert_int(writer, hex_conv);
}
return WRITE_OK;
}