llvm-project/libcxx/include/__format/formatter_integral.h

// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef _LIBCPP___FORMAT_FORMATTER_INTEGRAL_H
#define _LIBCPP___FORMAT_FORMATTER_INTEGRAL_H

#include <__algorithm/copy.h>
#include <__algorithm/copy_n.h>
#include <__algorithm/fill_n.h>
#include <__algorithm/transform.h>
#include <__assert>
#include <__config>
#include <__format/format_error.h>
#include <__format/format_fwd.h>
#include <__format/formatter.h>
#include <__format/parser_std_format_spec.h>
#include <__utility/unreachable.h>
#include <array>
#include <charconv>
#include <concepts>
#include <limits>
#include <string>

#ifndef _LIBCPP_HAS_NO_LOCALIZATION
#include <locale>
#endif

#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#  pragma GCC system_header
#endif

_LIBCPP_PUSH_MACROS
#include <__undef_macros>

_LIBCPP_BEGIN_NAMESPACE_STD

#if _LIBCPP_STD_VER > 17

/**
 * Integral formatting classes.
 *
 * There are two types used here:
 * * C++-type, the type as used in C++.
 * * format-type, the output type specified in the std-format-spec.
 *
 * Design of the integral formatters consists of several layers.
 * * @ref __parser_integral The basic std-format-spec parser for all integral
 *   classes. This parser does the basic sanity checks. It also contains some
 *   helper functions that are nice to have available for all parsers.
 * * A C++-type specific parser. These parsers must derive from
 *   @ref __parser_integral. Their task is to validate whether the parsed
 *   std-format-spec is valid for the C++-type and selected format-type. After
 *   validation they need to make sure all members are properly set. For
 *   example, when the alignment hasn't changed it needs to set the proper
 *   default alignment for the format-type. The following parsers are available:
 *   - @ref __parser_integer
 *   - @ref __parser_char
 *   - @ref __parser_bool
 * * A general formatter for all integral types @ref __formatter_integral. This
 *   formatter can handle all formatting of integers and characters. The class
 *   derives from the proper formatter.
 *   Note the boolean string format-type isn't supported in this class.
 * * A typedef C++-type group combining the @ref __formatter_integral with a
 *   parser:
 *   * @ref __formatter_integer
 *   * @ref __formatter_char
 *   * @ref __formatter_bool
 * * Then every C++-type has its own formatter specializations. They inherit
 *   from the C++-type group typedef. Most specializations need nothing else.
 *   Others need some additional specializations in this class.
 */
namespace __format_spec {

/** Wrapper around @ref to_chars, returning the output pointer. */
template <integral _Tp>
_LIBCPP_HIDE_FROM_ABI char* __to_buffer(char* __first, char* __last,
                                        _Tp __value, int __base) {
  // TODO FMT Evaluate code overhead due to not calling the internal function
  // directly. (Should be zero overhead.)
  to_chars_result __r = _VSTD::to_chars(__first, __last, __value, __base);
  _LIBCPP_ASSERT(__r.ec == errc(0), "Internal buffer too small");
  return __r.ptr;
}

/**
 * Helper to determine the buffer size to output a integer in Base @em x.
 *
 * There are several overloads for the supported bases. The function uses the
 * base as template argument so it can be used in a constant expression.
 */
template <unsigned_integral _Tp, size_t _Base>
_LIBCPP_HIDE_FROM_ABI constexpr size_t __buffer_size() noexcept
    requires(_Base == 2) {
  return numeric_limits<_Tp>::digits // The number of binary digits.
         + 2                         // Reserve space for the '0[Bb]' prefix.
         + 1;                        // Reserve space for the sign.
}

template <unsigned_integral _Tp, size_t _Base>
_LIBCPP_HIDE_FROM_ABI constexpr size_t __buffer_size() noexcept
    requires(_Base == 8) {
  return numeric_limits<_Tp>::digits // The number of binary digits.
             / 3                     // Adjust to octal.
         + 1                         // Turn floor to ceil.
         + 1                         // Reserve space for the '0' prefix.
         + 1;                        // Reserve space for the sign.
}

template <unsigned_integral _Tp, size_t _Base>
_LIBCPP_HIDE_FROM_ABI constexpr size_t __buffer_size() noexcept
    requires(_Base == 10) {
  return numeric_limits<_Tp>::digits10 // The floored value.
         + 1                           // Turn floor to ceil.
         + 1;                          // Reserve space for the sign.
}

template <unsigned_integral _Tp, size_t _Base>
_LIBCPP_HIDE_FROM_ABI constexpr size_t __buffer_size() noexcept
    requires(_Base == 16) {
  return numeric_limits<_Tp>::digits // The number of binary digits.
             / 4                     // Adjust to hexadecimal.
         + 2                         // Reserve space for the '0[Xx]' prefix.
         + 1;                        // Reserve space for the sign.
}

/**
 * Determines the required grouping based on the size of the input.
 *
 * The grouping's last element will be repeated. For simplicity this repeating
 * is unwrapped based on the length of the input. (When the input is short some
 * groups are not processed.)
 *
 * @returns The size of the groups to write. This means the number of
 * separator characters written is size() - 1.
 *
 * @note Since zero-sized groups cause issues they are silently ignored.
 *
 * @note The grouping field of the locale is always a @c std::string,
 * regardless whether the @c std::numpunct's type is @c char or @c wchar_t.
 */
_LIBCPP_HIDE_FROM_ABI inline string
__determine_grouping(ptrdiff_t __size, const string& __grouping) {
  _LIBCPP_ASSERT(!__grouping.empty() && __size > __grouping[0],
                 "The slow grouping formatting is used while there will be no "
                 "separators written");
  string __r;
  auto __end = __grouping.end() - 1;
  auto __ptr = __grouping.begin();

  while (true) {
    __size -= *__ptr;
    if (__size > 0)
      __r.push_back(*__ptr);
    else {
      // __size <= 0 so the value pushed will be <= *__ptr.
      __r.push_back(*__ptr + __size);
      return __r;
    }

    // Proceed to the next group.
    if (__ptr != __end) {
      do {
        ++__ptr;
        // Skip grouping with a width of 0.
      } while (*__ptr == 0 && __ptr != __end);
    }
  }

  __libcpp_unreachable();
}

template <class _Parser>
requires __formatter::__char_type<typename _Parser::char_type>
class _LIBCPP_TEMPLATE_VIS __formatter_integral : public _Parser {
public:
  using _CharT = typename _Parser::char_type;

  template <integral _Tp>
  _LIBCPP_HIDE_FROM_ABI auto format(_Tp __value, auto& __ctx)
      -> decltype(__ctx.out()) {
    if (this->__width_needs_substitution())
      this->__substitute_width_arg_id(__ctx.arg(this->__width));

    if (this->__type == _Flags::_Type::__char)
      return __format_as_char(__value, __ctx);

    if constexpr (unsigned_integral<_Tp>)
      return __format_unsigned_integral(__value, false, __ctx);
    else {
      // Depending on the std-format-spec string the sign and the value
      // might not be outputted together:
      // - alternate form may insert a prefix string.
      // - zero-padding may insert additional '0' characters.
      // Therefore the value is processed as a positive unsigned value.
      // The function @ref __insert_sign will a '-' when the value was negative.
      auto __r = __to_unsigned_like(__value);
      bool __negative = __value < 0;
      if (__negative)
        __r = __complement(__r);

      return __format_unsigned_integral(__r, __negative, __ctx);
    }
  }

private:
  /** Generic formatting for format-type c. */
  _LIBCPP_HIDE_FROM_ABI auto __format_as_char(integral auto __value,
                                              auto& __ctx)
      -> decltype(__ctx.out()) {
    if (this->__alignment == _Flags::_Alignment::__default)
      this->__alignment = _Flags::_Alignment::__right;

    using _Tp = decltype(__value);
    if constexpr (!same_as<_CharT, _Tp>) {
      // cmp_less and cmp_greater can't be used for character types.
      if constexpr (signed_integral<_CharT> == signed_integral<_Tp>) {
        if (__value < numeric_limits<_CharT>::min() ||
            __value > numeric_limits<_CharT>::max())
          __throw_format_error(
              "Integral value outside the range of the char type");
      } else if constexpr (signed_integral<_CharT>) {
        // _CharT is signed _Tp is unsigned
        if (__value >
            static_cast<make_unsigned_t<_CharT>>(numeric_limits<_CharT>::max()))
          __throw_format_error(
              "Integral value outside the range of the char type");
      } else {
        // _CharT is unsigned _Tp is signed
        if (__value < 0 || static_cast<make_unsigned_t<_Tp>>(__value) >
                               numeric_limits<_CharT>::max())
          __throw_format_error(
              "Integral value outside the range of the char type");
      }
    }

    const auto __c = static_cast<_CharT>(__value);
    return __write(_VSTD::addressof(__c), _VSTD::addressof(__c) + 1,
                   __ctx.out());
  }

  /**
   * Generic formatting for format-type bBdoxX.
   *
   * This small wrapper allocates a buffer with the required size. Then calls
   * the real formatter with the buffer and the prefix for the base.
   */
  _LIBCPP_HIDE_FROM_ABI auto
  __format_unsigned_integral(unsigned_integral auto __value, bool __negative,
                             auto& __ctx) -> decltype(__ctx.out()) {
    switch (this->__type) {
    case _Flags::_Type::__binary_lower_case: {
      array<char, __buffer_size<decltype(__value), 2>()> __array;
      return __format_unsigned_integral(__array.begin(), __array.end(), __value,
                                        __negative, 2, __ctx, "0b");
    }
    case _Flags::_Type::__binary_upper_case: {
      array<char, __buffer_size<decltype(__value), 2>()> __array;
      return __format_unsigned_integral(__array.begin(), __array.end(), __value,
                                        __negative, 2, __ctx, "0B");
    }
    case _Flags::_Type::__octal: {
      // Octal is special; if __value == 0 there's no prefix.
      array<char, __buffer_size<decltype(__value), 8>()> __array;
      return __format_unsigned_integral(__array.begin(), __array.end(), __value,
                                        __negative, 8, __ctx,
                                        __value != 0 ? "0" : nullptr);
    }
    case _Flags::_Type::__decimal: {
      array<char, __buffer_size<decltype(__value), 10>()> __array;
      return __format_unsigned_integral(__array.begin(), __array.end(), __value,
                                        __negative, 10, __ctx, nullptr);
    }
    case _Flags::_Type::__hexadecimal_lower_case: {
      array<char, __buffer_size<decltype(__value), 16>()> __array;
      return __format_unsigned_integral(__array.begin(), __array.end(), __value,
                                        __negative, 16, __ctx, "0x");
    }
    case _Flags::_Type::__hexadecimal_upper_case: {
      array<char, __buffer_size<decltype(__value), 16>()> __array;
      return __format_unsigned_integral(__array.begin(), __array.end(), __value,
                                        __negative, 16, __ctx, "0X");
    }
    default:
      _LIBCPP_ASSERT(false, "The parser should have validated the type");
      __libcpp_unreachable();
    }
  }

  template <class _Tp>
  requires(same_as<char, _Tp> || same_as<wchar_t, _Tp>) _LIBCPP_HIDE_FROM_ABI
      auto __write(const _Tp* __first, const _Tp* __last, auto __out_it)
          -> decltype(__out_it) {

    unsigned __size = __last - __first;
    if (this->__type != _Flags::_Type::__hexadecimal_upper_case) [[likely]] {
      if (__size >= this->__width)
        return _VSTD::copy(__first, __last, _VSTD::move(__out_it));

      return __formatter::__write(_VSTD::move(__out_it), __first, __last,
                                  __size, this->__width, this->__fill,
                                  this->__alignment);
    }

    // this->__type == _Flags::_Type::__hexadecimal_upper_case
    // This means all characters in the range [a-f] need to be changed to their
    // uppercase representation. The transformation is done as transformation
    // in the output routine instead of before. This avoids another pass over
    // the data.
    // TODO FMT See whether it's possible to do this transformation during the
    // conversion. (This probably requires changing std::to_chars' alphabet.)
    if (__size >= this->__width)
      return _VSTD::transform(__first, __last, _VSTD::move(__out_it),
                              __hex_to_upper);

    return __formatter::__write(_VSTD::move(__out_it), __first, __last, __size,
                                __hex_to_upper, this->__width, this->__fill,
                                this->__alignment);
  }

  _LIBCPP_HIDE_FROM_ABI auto
  __format_unsigned_integral(char* __begin, char* __end,
                             unsigned_integral auto __value, bool __negative,
                             int __base, auto& __ctx, const char* __prefix)
      -> decltype(__ctx.out()) {
    char* __first = __insert_sign(__begin, __negative, this->__sign);
    if (this->__alternate_form && __prefix)
      while (*__prefix)
        *__first++ = *__prefix++;

    char* __last = __to_buffer(__first, __end, __value, __base);
#ifndef _LIBCPP_HAS_NO_LOCALIZATION
    if (this->__locale_specific_form) {
      const auto& __np = use_facet<numpunct<_CharT>>(__ctx.locale());
      string __grouping = __np.grouping();
      ptrdiff_t __size = __last - __first;
      // Writing the grouped form has more overhead than the normal output
      // routines. If there will be no separators written the locale-specific
      // form is identical to the normal routine. Test whether to grouped form
      // is required.
      if (!__grouping.empty() && __size > __grouping[0])
        return __format_grouping(__ctx.out(), __begin, __first, __last,
                                 __determine_grouping(__size, __grouping),
                                 __np.thousands_sep());
    }
#endif
    auto __out_it = __ctx.out();
    if (this->__alignment != _Flags::_Alignment::__default)
      __first = __begin;
    else {
      // __buf contains [sign][prefix]data
      //                              ^ location of __first
      // The zero padding is done like:
      // - Write [sign][prefix]
      // - Write data right aligned with '0' as fill character.
      __out_it = _VSTD::copy(__begin, __first, _VSTD::move(__out_it));
      this->__alignment = _Flags::_Alignment::__right;
      this->__fill = _CharT('0');
      uint32_t __size = __first - __begin;
      this->__width -= _VSTD::min(__size, this->__width);
    }

    return __write(__first, __last, _VSTD::move(__out_it));
  }

#ifndef _LIBCPP_HAS_NO_LOCALIZATION
  /** Format's the locale-specific form's groupings. */
  template <class _OutIt, class _CharT>
  _LIBCPP_HIDE_FROM_ABI _OutIt
  __format_grouping(_OutIt __out_it, const char* __begin, const char* __first,
                    const char* __last, string&& __grouping, _CharT __sep) {

    // TODO FMT This function duplicates some functionality of the normal
    // output routines. Evaluate whether these parts can be efficiently
    // combined with the existing routines.

    unsigned __size = (__first - __begin) +    // [sign][prefix]
                      (__last - __first) +     // data
                      (__grouping.size() - 1); // number of separator characters

    __formatter::__padding_size_result __padding = {0, 0};
    if (this->__alignment == _Flags::_Alignment::__default) {
      // Write [sign][prefix].
      __out_it = _VSTD::copy(__begin, __first, _VSTD::move(__out_it));

      if (this->__width > __size) {
        // Write zero padding.
        __padding.__before = this->__width - __size;
        __out_it = _VSTD::fill_n(_VSTD::move(__out_it), this->__width - __size,
                                 _CharT('0'));
      }
    } else {
      if (this->__width > __size) {
        // Determine padding and write padding.
        __padding = __formatter::__padding_size(__size, this->__width,
                                                this->__alignment);

        __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before,
                                 this->__fill);
      }
      // Write [sign][prefix].
      __out_it = _VSTD::copy(__begin, __first, _VSTD::move(__out_it));
    }

    auto __r = __grouping.rbegin();
    auto __e = __grouping.rend() - 1;
    _LIBCPP_ASSERT(__r != __e, "The slow grouping formatting is used while "
                               "there will be no separators written.");
    // The output is divided in small groups of numbers to write:
    // - A group before the first separator.
    // - A separator and a group, repeated for the number of separators.
    // - A group after the last separator.
    // This loop achieves that process by testing the termination condition
    // midway in the loop.
    //
    // TODO FMT This loop evaluates the loop invariant `this->__type !=
    // _Flags::_Type::__hexadecimal_upper_case` for every iteration. (This test
    // happens in the __write call.) Benchmark whether making two loops and
    // hoisting the invariant is worth the effort.
    while (true) {
      if (this->__type == _Flags::_Type::__hexadecimal_upper_case) {
        __last = __first + *__r;
        __out_it = _VSTD::transform(__first, __last, _VSTD::move(__out_it),
                                    __hex_to_upper);
        __first = __last;
      } else {
        __out_it = _VSTD::copy_n(__first, *__r, _VSTD::move(__out_it));
        __first += *__r;
      }

      if (__r == __e)
        break;

      ++__r;
      *__out_it++ = __sep;
    }

    return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after,
                         this->__fill);
  }
#endif // _LIBCPP_HAS_NO_LOCALIZATION
};

} // namespace __format_spec

#endif //_LIBCPP_STD_VER > 17

_LIBCPP_END_NAMESPACE_STD

_LIBCPP_POP_MACROS

#endif // _LIBCPP___FORMAT_FORMATTER_INTEGRAL_H