ffi-icu/spec/chardet_spec.rb

43 lines
1.1 KiB
Ruby
Raw Normal View History

# encoding: UTF-8
describe ICU::CharDet::Detector do
2010-05-10 04:29:55 +08:00
2011-08-18 07:13:12 +08:00
let(:detector) { ICU::CharDet::Detector.new }
it "should recognize UTF-8" do
2011-08-18 07:13:12 +08:00
m = detector.detect("æåø")
expect(m.name).to eq("UTF-8")
expect(m.language).to be_a(String)
end
it "has a list of detectable charsets" do
2011-08-18 07:13:12 +08:00
cs = detector.detectable_charsets
expect(cs).to be_an(Array)
expect(cs).to_not be_empty
2010-05-10 04:29:55 +08:00
expect(cs.first).to be_a(String)
end
2010-05-10 04:29:55 +08:00
it "should disable / enable the input filter" do
expect(detector.input_filter_enabled?).to be_falsey
2011-08-18 07:13:12 +08:00
detector.input_filter_enabled = true
expect(detector.input_filter_enabled?).to be_truthy
2010-05-10 04:29:55 +08:00
end
it "should should set declared encoding" do
2011-08-18 07:13:12 +08:00
detector.declared_encoding = "UTF-8"
2010-05-10 04:29:55 +08:00
end
it "should detect several matching encodings" do
expect(detector.detect_all("foo bar")).to be_an(Array)
2010-05-10 04:29:55 +08:00
end
it "should support null bytes" do
# Create a utf-16 string and then force it to binary (ascii) to mimic data from net/http
string = "foo".encode("UTF-16").force_encoding("binary")
m = detector.detect(string)
expect(m.name).to eq("UTF-16BE")
expect(m.language).to be_a(String)
end
end