Add formatting for durations e.g. "2h 4min. 2sec."

Uses unumf_* to format the individual components of the duration, and
ulistfmt_* to join them. That's roughly analogous with what the C++
MeasureFormatter class does internally.

I asked about some feedback for this approach on the ICU mailing list
too: https://sourceforge.net/p/icu/mailman/message/37645785/
This commit is contained in:
KJ Tsanaktsidis 2022-04-26 11:51:13 +10:00 committed by Erick Guan
parent 44b11a658f
commit da26ebcfbc
4 changed files with 461 additions and 0 deletions

View File

@ -33,3 +33,4 @@ require "ffi-icu/normalizer"
require "ffi-icu/break_iterator"
require "ffi-icu/number_formatting"
require "ffi-icu/time_formatting"
require "ffi-icu/duration_formatting"

View File

@ -0,0 +1,281 @@
module ICU
module DurationFormatting
VALID_FIELDS = %i[
years
months
weeks
days
hours
minutes
seconds
milliseconds
microseconds
nanoseconds
]
HMS_FIELDS = %i[
hours
minutes
seconds
milliseconds
microseconds
nanoseconds
]
ROUNDABLE_FIELDS = %i[
seconds
milliseconds
microseconds
nanoseconds
]
VALID_STYLES = %i[long short narrow digital]
STYLES_TO_LIST_JOIN_FORMAT = {
long: :wide,
short: :short,
narrow: :narrow,
digital: :narrow,
}
UNIT_FORMAT_STRINGS = {
years: 'measure-unit/duration-year',
months: 'measure-unit/duration-month',
weeks: 'measure-unit/duration-week',
days: 'measure-unit/duration-day',
hours: 'measure-unit/duration-hour',
minutes: 'measure-unit/duration-minute',
seconds: 'measure-unit/duration-second',
milliseconds: 'measure-unit/duration-millisecond',
microseconds: 'measure-unit/duration-microsecond',
nanoseconds: 'measure-unit/duration-nanosecond',
}
STYLES_TO_NUMBER_FORMAT_WIDTH = {
long: 'unit-width-full-name',
short: 'unit-width-short',
narrow: 'unit-width-narrow',
# digital for hours:minutes:seconds has some special casing.
digital: 'unit-width-narrow',
}
def self.format(fields, locale:, style: :long)
DurationFormatter.new(locale: locale, style: style).format(fields)
end
class DurationFormatter
def initialize(locale:, style: :long)
if !Lib.respond_to?(:unumf_openForSkeletonAndLocale) || !Lib.respond_to?(:ulistfmt_openForType)
raise "ICU::DurationFormatting requires ICU >= 67"
end
raise ArgumentError, "Unknown style #{style}" unless VALID_STYLES.include?(style)
@locale = locale
@style = style
# These are created lazily based on what parts are actually included
@number_formatters = {}
list_join_format = STYLES_TO_LIST_JOIN_FORMAT.fetch(style)
@list_formatter = FFI::AutoPointer.new(
Lib.check_error { |error|
Lib.ulistfmt_openForType(@locale, :units, list_join_format, error)
},
Lib.method(:ulistfmt_close)
)
end
def format(fields)
fields.each_key do |field|
raise "Unknown field #{field}" unless VALID_FIELDS.include?(field)
end
fields = fields.dup # we might modify this argument.
# Intl.js spec says that rounding options affect only the smallest unit, and only
# if that unit is sub-second. All other fields therefore need to be truncated.
smallest_unit = VALID_FIELDS[fields.keys.map { |k| VALID_FIELDS.index(k) }.max]
fields.each_key do |k|
fields[k] = fields[k].to_i unless k == smallest_unit && ROUNDABLE_FIELDS.include?(smallest_unit)
end
formatted_hms = nil
if @style == :digital
# icu::MeasureFormat contains special casing for hours/minutes/seconds formatted
# at numeric width, to render it as h:mm:s, essentially. This involves using
# a pattern called durationUnits defined in the ICU data for the locale.
# If we have data for this combination of hours/mins/seconds in this locale,
# use that and emulate ICU's special casing.
formatted_hms = format_hms(fields)
if formatted_hms
# We've taken care of all these fields now.
HMS_FIELDS.each do |f|
fields.delete f
end
end
end
formatted_fields = VALID_FIELDS.map do |f|
next unless fields.key?(f)
next unless fields[f] != 0
format_number(fields[f], [
UNIT_FORMAT_STRINGS[f], STYLES_TO_NUMBER_FORMAT_WIDTH[@style],
(".#########" if f == smallest_unit),
].compact.join(' '))
end
formatted_fields << formatted_hms
formatted_fields.compact!
format_list(formatted_fields)
end
private
def hms_duration_units_pattern(fields)
return nil unless [:hours, :minutes, :seconds].any? { |k| fields.key?(k) }
@unit_res_bundle ||= FFI::AutoPointer.new(
Lib.check_error { |error| Lib.ures_open(Lib.resource_bundle_name(:unit), @locale, error) },
Lib.method(:ures_close)
)
resource_key = "durationUnits/"
resource_key << "h" if fields.key?(:hours)
resource_key << "m" if fields.key?(:minutes)
resource_key << "s" if [:seconds, :milliseconds, :microseconds, :nanoseconds].any? { |f| fields.key?(f) }
begin
pattern_resource = FFI::AutoPointer.new(
Lib.check_error { |error| Lib.ures_getBykeyWithFallback(@unit_res_bundle, resource_key, nil, error) },
Lib.method(:ures_close)
)
rescue MissingResourceError
# This combination of h,m,s not present for this locale.
return nil
end
# Read the resource as a UChar (whose memory we _do not own_ - it's static data) and
# convert it to a Ruby string.
pattern_uchar_len = FFI::MemoryPointer.new(:int32_t)
pattern_uchar = Lib.check_error { |error|
Lib.ures_getString(pattern_resource, pattern_uchar_len, error)
}
pattern_str = pattern_uchar.read_array_of_uint16(pattern_uchar_len.read_int32).pack("U*")
# For some reason I can't comprehend, loadNumericDateFormatterPattern in ICU wants to turn
# h's into H's here. I guess we have to do it too because the pattern data could in theory
# now contain either.
pattern_str.gsub('h', 'H')
end
def format_hms(fields)
pattern = hms_duration_units_pattern(fields)
return nil if pattern.nil?
# According to the Intl.js spec, when formatting in digital, everything < seconds
# should be coalesced into decimal seconds
seconds_incl_fractional = fields.fetch(:seconds, 0)
second_precision = 0
if fields.key?(:milliseconds)
seconds_incl_fractional += fields[:milliseconds] / 1e3
second_precision = 3
end
if fields.key?(:microseconds)
seconds_incl_fractional += fields[:microseconds] / 1e6
second_precision = 6
end
if fields.key?(:nanoseconds)
seconds_incl_fractional += fields[:nanoseconds] / 1e9
second_precision = 9
end
# Follow the rules in ICU measfmt.cpp formatNumeric to fill in the patterns here with
# the appropriate values.
enum = pattern.each_char
protect = false
result = ""
loop do
char = enum.next
next_char = enum.peek rescue nil
if protect
# In literal mode
if char == "'"
protect = false
next
end
result << char
next
end
value = case char
when 'H' then fields[:hours]
when 'm' then fields[:minutes]
when 's' then seconds_incl_fractional
end
case char
when 'H', 'm', 's'
skeleton = "."
if char == 's' && second_precision > 0
skeleton << ("0" * second_precision)
else
skeleton << ("#" * 9)
end
if char == next_char
# It's doubled - means format it at zero fill
skeleton << " integer-width/00"
enum.next
end
result << format_number(value, skeleton)
when "'"
if next_char == char
# double-apostrophe, means literal '
result << "'"
enum.next
else
protect = true
end
else
result << char
end
end
result
end
def number_formatter(skeleton)
@number_formatters[skeleton] ||= begin
skeleton_uchar = UCharPointer.from_string(skeleton)
FFI::AutoPointer.new(
Lib.check_error { |error|
Lib.unumf_openForSkeletonAndLocale(skeleton_uchar, skeleton_uchar.length_in_uchars, @locale, error)
},
Lib.method(:unumf_close)
)
end
end
def format_number(value, skeleton)
formatter = number_formatter(skeleton)
result = FFI::AutoPointer.new(
Lib.check_error { |error| Lib.unumf_openResult(error) },
Lib.method(:unumf_closeResult)
)
value_str = value.to_s
Lib.check_error do |error|
Lib.unumf_formatDecimal(formatter, value_str, value_str.size, result, error)
end
Lib::Util.read_uchar_buffer(0) do |buf, error|
Lib.unumf_resultToString(result, buf, buf.length_in_uchars, error)
end
end
def format_list(values)
value_uchars = values.map(&UCharPointer.method(:from_string))
value_uchars_array = FFI::MemoryPointer.new(:pointer, value_uchars.size)
value_uchars_array.put_array_of_pointer(0, value_uchars)
value_lengths_array = FFI::MemoryPointer.new(:int32_t, value_uchars.size)
value_lengths_array.put_array_of_int32(0, value_uchars.map(&:length_in_uchars))
Lib::Util.read_uchar_buffer(0) do |buf, error|
Lib.ulistfmt_format(
@list_formatter, value_uchars_array, value_lengths_array,
value_uchars.size, buf, buf.length_in_uchars, error
)
end
end
end
end
end

View File

@ -5,6 +5,9 @@ module ICU
class BufferOverflowError < StandardError
end
class MissingResourceError < StandardError
end
module Lib
extend FFI::Library
@ -129,6 +132,8 @@ module ICU
name = Lib.u_errorName error_code
if name == "U_BUFFER_OVERFLOW_ERROR"
raise BufferOverflowError
elsif name == "U_MISSING_RESOURCE_ERROR"
raise MissingResourceError
else
raise Error, name
end
@ -431,6 +436,42 @@ module ICU
attach_optional_function :unum_format_decimal, "unum_formatDecimal#{suffix}", [:pointer, :string, :int32_t, :pointer, :int32_t, :pointer, :pointer], :int32_t
attach_function :unum_format_currency, "unum_formatDoubleCurrency#{suffix}", [:pointer, :double, :pointer, :pointer, :int32_t, :pointer, :pointer], :int32_t
attach_function :unum_set_attribute, "unum_setAttribute#{suffix}", [:pointer, :number_format_attribute, :int32_t], :void
# UResourceBundle
attach_function :ures_open, "ures_open#{suffix}", [:string, :string, :pointer], :pointer
attach_function :ures_close, "ures_close#{suffix}", [:pointer], :void
# This function is marked "internal" but it's fully exported by the library ABI, so we can use it anyway.
attach_function :ures_getBykeyWithFallback, "ures_getByKeyWithFallback#{suffix}", [:pointer, :string, :pointer, :pointer], :pointer
attach_function :ures_getString, "ures_getString#{suffix}", [:pointer, :pointer, :pointer], :pointer
def self.resource_bundle_name(type)
stem = "icudt" + version.read_array_of_char(4)[0].to_s + "l" + "-"
stem + type.to_s
end
# UNumberFormatter
attach_optional_function :unumf_openForSkeletonAndLocale, "unumf_openForSkeletonAndLocale#{suffix}", [:pointer, :int32_t, :string, :pointer], :pointer
attach_optional_function :unumf_close, "unumf_close#{suffix}", [:pointer], :void
attach_optional_function :unumf_openResult, "unumf_openResult#{suffix}", [:pointer], :pointer
attach_optional_function :unumf_closeResult, "unumf_closeResult#{suffix}", [:pointer], :void
attach_optional_function :unumf_formatDecimal, "unumf_formatDecimal#{suffix}", [:pointer, :string, :int32_t, :pointer, :pointer], :void
attach_optional_function :unumf_resultToString, "unumf_resultToString#{suffix}", [:pointer, :pointer, :int32_t, :pointer], :int32_t
# UListFormatter
enum :ulistfmt_type, [
:and, 0,
:or, 1,
:units, 2,
]
enum :ulistfmt_width, [
:wide, 0,
:short, 1,
:narrow, 2,
]
attach_optional_function :ulistfmt_openForType, "ulistfmt_openForType#{suffix}", [:string, :ulistfmt_type, :ulistfmt_width, :pointer], :pointer
attach_optional_function :ulistfmt_close, "ulistfmt_close#{suffix}", [:pointer], :void
attach_optional_function :ulistfmt_format, "ulistfmt_format#{suffix}", [:pointer, :pointer, :pointer, :int32_t, :pointer, :int32_t, :pointer], :int32_t
# date
enum :date_format_style, [
:pattern, -2,

View File

@ -0,0 +1,138 @@
module ICU
module DurationFormatting
describe 'DurationFormatting::format' do
before(:each) do
skip("Only works on ICU >= 67") if Lib.version.to_a[0] < 67
end
it 'produces hours, minutes, and seconds in order' do
result = DurationFormatting.format({hours: 1, minutes: 2, seconds: 3}, locale: 'C', style: :long)
expect(result).to match(/1.*hour.*2.*minute.*3.*second/i)
end
it 'rounds down fractional seconds < 0.5' do
result = DurationFormatting.format({seconds: 5.4}, locale: 'C', style: :long)
expect(result).to match(/5.*second/i)
end
it 'rounds up fractional seconds > 0.5' do
result = DurationFormatting.format({seconds: 5.6}, locale: 'C', style: :long)
expect(result).to match(/6.*second/i)
end
it 'trims off leading zero values' do
result = DurationFormatting.format({hours: 0, minutes: 1, seconds: 30}, locale: 'C', style: :long)
expect(result).to match(/1.*minute.*30.*second/i)
expect(result).to_not match(/hour/i)
end
it 'trims off leading missing values' do
result = DurationFormatting.format({minutes: 1, seconds: 30}, locale: 'C', style: :long)
expect(result).to match(/1.*minute.*30.*second/i)
expect(result).to_not match(/hour/i)
end
it 'trims off non-leading zero values' do
result = DurationFormatting.format({hours: 1, minutes: 0, seconds: 10}, locale: 'C', style: :long)
expect(result).to match(/1.*hour.*10.*second/i)
expect(result).to_not match(/minute/i)
end
it 'trims off non-leading missing values' do
result = DurationFormatting.format({hours: 1, seconds: 10}, locale: 'C', style: :long)
expect(result).to match(/1.*hour.*10.*second/i)
expect(result).to_not match(/minute/i)
end
it 'uses comma-based number formatting as appropriate for locale' do
result = DurationFormatting.format({seconds: 90123}, locale: 'en-AU', style: :long)
expect(result).to match(/90,123.*second/i)
expect(result).to_not match(/hour/i)
expect(result).to_not match(/minute/i)
end
it 'localizes unit names' do
result = DurationFormatting.format({hours: 1, minutes: 2, seconds: 3}, locale: 'el', style: :long)
expect(result).to match(/1.*ώρα.*2.*λεπτά.*3.*δευτερόλεπτα/i)
end
it 'can format long' do
result = DurationFormatting.format({hours: 1, minutes: 2, seconds: 3}, locale: 'en-AU', style: :long)
expect(result).to match(/hour.*minute.*second/i)
end
it 'can format short' do
result = DurationFormatting.format({hours: 1, minutes: 2, seconds: 3}, locale: 'en-AU', style: :short)
expect(result).to match(/hr.*min.*sec/i)
expect(result).to_not match(/hour/i)
expect(result).to_not match(/minute/i)
expect(result).to_not match(/second/i)
end
it 'can format narrow' do
result = DurationFormatting.format({hours: 1, minutes: 2, seconds: 3}, locale: 'en-AU', style: :narrow)
expect(result).to match(/h.*min.*s/i)
expect(result).to_not match(/hr/i)
expect(result).to_not match(/sec/i)
end
it 'can format digital' do
result = DurationFormatting.format({hours: 1, minutes: 2, seconds: 3}, locale: 'en-AU', style: :digital)
expect(result).to eql('1:02:03')
end
it 'can format the full sequence of time units in order' do
duration = {
years: 1,
months: 2,
weeks: 3,
days: 4,
hours: 5,
minutes: 6,
seconds: 7,
milliseconds: 8,
microseconds: 9,
nanoseconds: 10,
}
result = DurationFormatting.format(duration, locale: 'en-AU', style: :short)
expect(result).to match(/1.yr.*2.*mths.*3.*wks.*4.*days.*5.*hrs.*6.*mins.*7.*secs.*8.*ms.*9.*μs.*10.*ns/)
end
it 'joins ms, us, ns values to seconds in digital format' do
duration = {minutes: 10, seconds: 5, milliseconds: 325, microseconds: 53, nanoseconds: 236}
result = DurationFormatting.format(duration, locale: 'en-AU', style: :digital)
expect(result).to eql('10:05.325053236')
end
it 'includes trailing zeros as appropriate for the last unit in digital format' do
duration = {minutes: 10, seconds: 5, milliseconds: 325, microseconds: 400}
result = DurationFormatting.format(duration, locale: 'en-AU', style: :digital)
expect(result).to eql('10:05.325400')
end
it 'joins h:mm:ss and other units in digital format' do
duration = {days: 8, hours: 23, minutes: 10, seconds: 9}
result = DurationFormatting.format(duration, locale: 'en-AU', style: :digital)
expect(result).to match(/8.*d.*23:10:09/ )
end
it 'ignores all decimal parts except the last, if it is seconds' do
duration = {hours: 7.3, minutes: 9.7, seconds: 8.93}
result = DurationFormatting.format(duration, locale: 'en-AU', style: :short)
expect(result).to match(/7[^0-9]*hrs.*9[^0-9]*min.*8\.93[^0-9]*secs/)
end
it 'ignores all decimal parts except the last, if it is milliseconds' do
duration = {hours: 7.3, minutes: 9.7, seconds: 8.93, milliseconds: 632.2}
result = DurationFormatting.format(duration, locale: 'en-AU', style: :short)
expect(result).to match(/7[^0-9]*hrs.*9[^0-9]*min.*8[^0-9]*secs.*632\.2[^0-9]*ms/)
end
it 'ignores all decimal parts including the last, if it is > seconds' do
duration = {hours: 7.3, minutes: 9.7}
result = DurationFormatting.format(duration, locale: 'en-AU', style: :short)
expect(result).to match(/7[^0-9]*hrs.*9[^0-9]*min/)
end
end
end
end