Locale class and library entry points

This commit is contained in:
Chris Bandy 2012-08-06 16:25:49 -05:00
parent 5c161693b6
commit 1821793c2e
4 changed files with 560 additions and 0 deletions

View File

@ -34,6 +34,7 @@ require "ffi-icu/lib"
require "ffi-icu/uchar"
require "ffi-icu/chardet"
require "ffi-icu/collation"
require "ffi-icu/locale"
require "ffi-icu/transliteration"
require "ffi-icu/normalization"
require "ffi-icu/break_iterator"

View File

@ -157,6 +157,50 @@ module ICU
attach_function :u_charsToUChars, "u_charsToUChars#{suffix}", [:string, :pointer, :int32_t], :void
attach_function :u_UCharsToChars, "u_UCharsToChars#{suffix}", [:pointer, :string, :int32_t], :void
#
# Locale
#
# http://icu-project.org/apiref/icu4c/uloc_8h.html
#
enum :layout_type, [:ltr, :rtl, :ttb, :btt, :unknown]
attach_function :uloc_addLikelySubtags, "uloc_addLikelySubtags#{suffix}", [:string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_canonicalize, "uloc_canonicalize#{suffix}", [:string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_countAvailable, "uloc_countAvailable#{suffix}", [], :int32_t
attach_function :uloc_forLanguageTag, "uloc_forLanguageTag#{suffix}", [:string, :pointer, :int32_t, :pointer, :pointer], :int32_t
attach_function :uloc_getAvailable, "uloc_getAvailable#{suffix}", [:int32_t], :string
attach_function :uloc_getBaseName, "uloc_getBaseName#{suffix}", [:string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_getCountry, "uloc_getCountry#{suffix}", [:string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_getDefault, "uloc_getDefault#{suffix}", [], :string
attach_function :uloc_getISO3Country, "uloc_getISO3Country#{suffix}", [:string], :string
attach_function :uloc_getISO3Language, "uloc_getISO3Language#{suffix}", [:string], :string
attach_function :uloc_getISOCountries, "uloc_getISOCountries#{suffix}", [], :pointer
attach_function :uloc_getISOLanguages, "uloc_getISOLanguages#{suffix}", [], :pointer
attach_function :uloc_getKeywordValue, "uloc_getKeywordValue#{suffix}", [:string, :string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_getLanguage, "uloc_getLanguage#{suffix}", [:string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_getLocaleForLCID, "uloc_getLocaleForLCID#{suffix}", [:uint32, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_getLCID, "uloc_getLCID#{suffix}", [:string], :uint32
attach_function :uloc_getName, "uloc_getName#{suffix}", [:string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_getParent, "uloc_getParent#{suffix}", [:string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_getScript, "uloc_getScript#{suffix}", [:string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_getVariant, "uloc_getVariant#{suffix}", [:string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_minimizeSubtags, "uloc_minimizeSubtags#{suffix}", [:string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_openKeywords, "uloc_openKeywords#{suffix}", [:string, :pointer], :pointer
attach_function :uloc_setDefault, "uloc_setDefault#{suffix}", [:string, :pointer], :void
attach_function :uloc_setKeywordValue, "uloc_setKeywordValue#{suffix}", [:string, :string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_toLanguageTag, "uloc_toLanguageTag#{suffix}", [:string, :pointer, :int32_t, :int8_t, :pointer], :int32_t
attach_function :uloc_getCharacterOrientation, "uloc_getCharacterOrientation#{suffix}", [:string, :pointer], :layout_type
attach_function :uloc_getDisplayCountry, "uloc_getDisplayCountry#{suffix}", [:string, :string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_getDisplayKeyword, "uloc_getDisplayKeyword#{suffix}", [:string, :string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_getDisplayKeywordValue, "uloc_getDisplayKeywordValue#{suffix}", [:string, :string, :string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_getDisplayLanguage, "uloc_getDisplayLanguage#{suffix}", [:string, :string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_getDisplayName, "uloc_getDisplayName#{suffix}", [:string, :string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_getDisplayScript, "uloc_getDisplayScript#{suffix}", [:string, :string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_getDisplayVariant, "uloc_getDisplayVariant#{suffix}", [:string, :string, :pointer, :int32_t, :pointer], :int32_t
attach_function :uloc_getLineOrientation, "uloc_getLineOrientation#{suffix}", [:string, :pointer], :layout_type
# CharDet
#
# http://icu-project.org/apiref/icu4c/ucsdet_8h.html

274
lib/ffi-icu/locale.rb Normal file
View File

@ -0,0 +1,274 @@
module ICU
class Locale
module SharedMethods
private
def read_null_terminated_array_of_strings(pointer)
offset = 0
result = []
while (ptr = pointer.get_pointer(offset)) != FFI::Pointer::NULL do
result << ptr.read_string
offset += FFI::Pointer.size
end
result
end
def read_string_buffer(length = 64)
attempts = 0
begin
result = FFI::MemoryPointer.new(:char, length)
Lib.check_error { |status| length = yield result, status }
rescue BufferOverflowError
attempts += 1
retry if attempts < 2
raise BufferOverflowError, "needed: #{length}"
end
result.read_string(length)
end
def read_uchar_buffer(length = 64)
attempts = 0
begin
result = UCharPointer.new(length)
Lib.check_error { |status| length = yield result, status }
rescue BufferOverflowError
attempts += 1
retry if attempts < 2
raise BufferOverflowError, "needed: #{length}"
end
result.string(length)
end
end
class << self
include SharedMethods
def available
(0...Lib.uloc_countAvailable).map do |idx|
Locale.new(Lib.uloc_getAvailable(idx))
end
end
def default
Locale.new(Lib.uloc_getDefault)
end
def default=(locale)
Lib.check_error { |status| Lib.uloc_setDefault(locale.to_s, status) }
end
def for_language_tag(tag)
result = read_string_buffer do |buffer, status|
Lib.uloc_forLanguageTag(tag, buffer, buffer.size, nil, status)
end
Locale.new(result)
end
def for_lcid(id)
result = read_string_buffer do |buffer, status|
Lib.uloc_getLocaleForLCID(id, buffer, buffer.size, status)
end
Locale.new(result)
end
def iso_countries
read_null_terminated_array_of_strings(Lib.uloc_getISOCountries)
end
def iso_languages
read_null_terminated_array_of_strings(Lib.uloc_getISOLanguages)
end
end
include SharedMethods
attr_reader :id
def initialize(id)
@id = id.to_s
end
def ==(other)
other.is_a?(self.class) && other.id == self.id
end
def base_name
read_string_buffer do |buffer, status|
Lib.uloc_getBaseName(@id, buffer, buffer.size, status)
end
end
def canonical
read_string_buffer do |buffer, status|
Lib.uloc_canonicalize(@id, buffer, buffer.size, status)
end
end
def character_orientation
Lib.check_error { |status| Lib.uloc_getCharacterOrientation(@id, status) }
end
def country
read_string_buffer do |buffer, status|
Lib.uloc_getCountry(@id, buffer, buffer.size, status)
end
end
def display_country(locale = nil)
locale = locale.to_s unless locale.nil?
read_uchar_buffer do |buffer, status|
Lib.uloc_getDisplayCountry(@id, locale, buffer, buffer.size, status)
end
end
def display_language(locale = nil)
locale = locale.to_s unless locale.nil?
read_uchar_buffer do |buffer, status|
Lib.uloc_getDisplayLanguage(@id, locale, buffer, buffer.size, status)
end
end
def display_name(locale = nil)
locale = locale.to_s unless locale.nil?
read_uchar_buffer do |buffer, status|
Lib.uloc_getDisplayName(@id, locale, buffer, buffer.size, status)
end
end
def display_script(locale = nil)
locale = locale.to_s unless locale.nil?
read_uchar_buffer do |buffer, status|
Lib.uloc_getDisplayScript(@id, locale, buffer, buffer.size, status)
end
end
def display_variant(locale = nil)
locale = locale.to_s unless locale.nil?
read_uchar_buffer do |buffer, status|
Lib.uloc_getDisplayVariant(@id, locale, buffer, buffer.size, status)
end
end
def iso_country
Lib.uloc_getISO3Country(@id)
end
def iso_language
Lib.uloc_getISO3Language(@id)
end
def keyword(keyword)
read_string_buffer do |buffer, status|
Lib.uloc_getKeywordValue(@id, keyword.to_s, buffer, buffer.size, status)
end
end
def keywords
enum_ptr = Lib.check_error { |status| Lib.uloc_openKeywords(@id, status) }
begin
Lib.enum_ptr_to_array(enum_ptr)
ensure
Lib.uenum_close(enum_ptr)
end
end
def language
read_string_buffer do |buffer, status|
Lib.uloc_getLanguage(@id, buffer, buffer.size, status)
end
end
def lcid
Lib.uloc_getLCID(@id)
end
def line_orientation
Lib.check_error { |status| Lib.uloc_getLineOrientation(@id, status) }
end
def name
read_string_buffer do |buffer, status|
Lib.uloc_getName(@id, buffer, buffer.size, status)
end
end
def parent
read_string_buffer do |buffer, status|
Lib.uloc_getParent(@id, buffer, buffer.size, status)
end
end
def script
read_string_buffer do |buffer, status|
Lib.uloc_getScript(@id, buffer, buffer.size, status)
end
end
def to_language_tag(strict = false)
read_string_buffer do |buffer, status|
Lib.uloc_toLanguageTag(@id, buffer, buffer.size, strict ? 1 : 0, status)
end
end
alias_method :to_s, :id
def variant
read_string_buffer do |buffer, status|
Lib.uloc_getVariant(@id, buffer, buffer.size, status)
end
end
def with_keyword(keyword, value)
keyword = keyword.to_s
length = @id.length + keyword.length + 64
unless value.nil?
value = value.to_s
length += value.length
end
result = read_string_buffer(length) do |buffer, status|
buffer.write_string(@id)
Lib.uloc_setKeywordValue(keyword, value, buffer, buffer.size, status)
end
Locale.new(result)
end
def with_keywords(hash)
hash.reduce(self) do |locale, (keyword, value)|
locale.with_keyword(keyword, value)
end
end
def with_likely_subtags
result = read_string_buffer do |buffer, status|
Lib.uloc_addLikelySubtags(@id, buffer, buffer.size, status)
end
Locale.new(result)
end
def with_minimized_subtags
result = read_string_buffer do |buffer, status|
Lib.uloc_minimizeSubtags(@id, buffer, buffer.size, status)
end
Locale.new(result)
end
end
end

241
spec/locale_spec.rb Normal file
View File

@ -0,0 +1,241 @@
# encoding: UTF-8
require 'spec_helper'
module ICU
describe Locale do
describe 'the available locales' do
subject { Locale.available }
it { should be_an Array }
it { should_not be_empty }
its(:first) { should be_a Locale }
end
describe 'the available ISO 639 country codes' do
subject { Locale.iso_countries }
it { should be_an Array }
it { should_not be_empty }
its(:first) { should be_a String }
end
describe 'the available ISO 639 language codes' do
subject { Locale.iso_languages }
it { should be_an Array }
it { should_not be_empty }
its(:first) { should be_a String }
end
describe 'the default' do
subject { Locale.default }
let(:locale) do
locales = Locale.available
locales.delete(Locale.default)
locales.sample
end
it { should be_a Locale }
it 'can be assigned using Locale' do
(Locale.default = locale).should == locale
Locale.default.should == locale
end
it 'can be assigned using string' do
string = locale.to_s
(Locale.default = string).should == string
Locale.default.should == Locale.new(string)
end
it 'can be assigned using symbol' do
symbol = locale.to_s.to_sym
(Locale.default = symbol).should == symbol
Locale.default.should == Locale.new(symbol)
end
end
describe 'BCP 47 language tags' do
it 'converts a language tag to a locale' do
Locale.for_language_tag('en-us').should == Locale.new('en_US')
Locale.for_language_tag('nan-Hant-tw').should == Locale.new('nan_Hant_TW')
end
it 'returns a language tag for a locale' do
Locale.new('en_US').to_language_tag.should == 'en-US'
Locale.new('zh_TW').to_language_tag.should == 'zh-TW'
Locale.new('zh_Hans_CH_PINYIN').to_language_tag.should == 'zh-Hans-CH-u-co-pinyin'
end
end
describe 'Win32 locale IDs' do
it 'converts an LCID to a locale' do
Locale.for_lcid(1033).should == Locale.new('en_US')
Locale.for_lcid(1036).should == Locale.new('fr_FR')
end
it 'returns an LCID for a locale' do
Locale.new('en_US').lcid.should == 1033
Locale.new('es_US').lcid.should == 21514
end
end
describe 'display' do
context 'in a specific locale' do
it 'returns the country' do
Locale.new('de_DE').display_country('en').should == 'Germany'
Locale.new('en_US').display_country('fr').should == 'États-Unis'
end
it 'returns the language' do
Locale.new('fr_FR').display_language('de').should == 'Französisch'
Locale.new('zh_CH').display_language('en').should == 'Chinese'
end
it 'returns the name' do
Locale.new('en_US').display_name('de').should == 'Englisch (Vereinigte Staaten)'
Locale.new('zh_CH').display_name('fr').should == 'chinois (Suisse)'
end
it 'returns the script' do
Locale.new('zh_Hans_CH').display_script('en').should == 'Simplified Han'
Locale.new('zh_Hans_CH').display_script('fr').should == 'chinois simplifié'
end
it 'returns the variant' do
Locale.new('zh_Hans_CH_PINYIN').display_variant('en').should == 'Pinyin Romanization'
Locale.new('zh_Hans_CH_PINYIN').display_variant('es').should == 'Romanización pinyin'
end
end
context 'in the default locale' do
let(:locale) { Locale.new('de_DE') }
it 'returns the country' do
locale.display_country.should == locale.display_country(Locale.default)
end
it 'returns the language' do
locale.display_language.should == locale.display_language(Locale.default)
end
it 'returns the name' do
locale.display_name.should == locale.display_name(Locale.default)
end
it 'returns the script' do
locale.display_script.should == locale.display_script(Locale.default)
end
it 'returns the variant' do
locale.display_variant.should == locale.display_variant(Locale.default)
end
end
end
describe 'formatting' do
let(:locale) { Locale.new('de-de.utf8@collation = phonebook') }
it('is formatted') { locale.name.should == 'de_DE.utf8@collation=phonebook' }
it('is formatted without keywords') { locale.base_name.should == 'de_DE.utf8' }
it('is formatted for ICU') { locale.canonical.should == 'de_DE@collation=phonebook' }
end
it 'truncates a properly formatted locale, returning the "parent"' do
Locale.new('es-mx').parent.should == ''
Locale.new('es_MX').parent.should == 'es'
Locale.new('zh_Hans_CH_PINYIN').parent.should == 'zh_Hans_CH'
end
describe 'ISO codes' do
it 'returns the ISO 3166 alpha-3 country code' do
Locale.new('en_US').iso_country.should == 'USA'
Locale.new('zh_CN').iso_country.should == 'CHN'
end
it 'returns the ISO 639 three-letter language code' do
Locale.new('en_US').iso_language.should == 'eng'
Locale.new('zh_CN').iso_language.should == 'zho'
end
end
describe 'keywords' do
context 'when improperly formatted' do
let(:locale) { Locale.new('de_DE@euro') }
it 'raises an error' do
expect { locale.keywords }.to raise_error(ICU::Error)
end
end
context 'when properly formatted' do
let(:locale) { Locale.new('de_DE@currency=EUR') }
it 'returns the list of keywords' do
locale.keywords.should == ['currency']
end
end
it 'can be read' do
Locale.new('en_US@calendar=chinese').keyword('calendar').should == 'chinese'
Locale.new('en_US@calendar=chinese').keyword(:calendar).should == 'chinese'
Locale.new('en_US@some=thing').keyword('missing').should == ''
end
it 'can be added' do
Locale.new('de_DE').with_keyword('currency', 'EUR').should == Locale.new('de_DE@currency=EUR')
Locale.new('de_DE').with_keyword(:currency, :EUR).should == Locale.new('de_DE@currency=EUR')
end
it 'can be added using hash' do
Locale.new('fr').with_keywords(:a => :b, :c => :d).should == Locale.new('fr@a=b;c=d')
end
it 'can be removed' do
Locale.new('en_US@some=thing').with_keyword(:some, nil).should == Locale.new('en_US')
Locale.new('en_US@some=thing').with_keyword(:some, '').should == Locale.new('en_US')
end
end
describe 'orientation' do
it 'returns the character orientation' do
Locale.new('ar').character_orientation.should == :rtl
Locale.new('en').character_orientation.should == :ltr
Locale.new('fa').character_orientation.should == :rtl
end
it 'returns the line orientation' do
Locale.new('ar').line_orientation.should == :ttb
Locale.new('en').line_orientation.should == :ttb
Locale.new('fa').line_orientation.should == :ttb
end
end
describe 'subtags' do
let(:locale) { Locale.new('zh-hans-ch-pinyin') }
it('returns the country code') { locale.country.should == 'CH' }
it('returns the language code') { locale.language.should == 'zh' }
it('returns the script code') { locale.script.should == 'Hans' }
it('returns the variant code') { locale.variant.should == 'PINYIN' }
describe 'likely subtags according to UTS #35' do
it 'adds likely subtags' do
Locale.new('en').with_likely_subtags.should == Locale.new('en_Latn_US')
Locale.new('sr').with_likely_subtags.should == Locale.new('sr_Cyrl_RS')
Locale.new('zh_TW').with_likely_subtags.should == Locale.new('zh_Hant_TW')
end
it 'removes likely subtags' do
Locale.new('en_US').with_minimized_subtags.should == Locale.new('en')
Locale.new('sr_RS').with_minimized_subtags.should == Locale.new('sr')
Locale.new('zh_Hant_TW').with_minimized_subtags.should == Locale.new('zh_TW')
end
end
end
end
end