2010-05-10 03:33:20 +08:00
|
|
|
# encoding: UTF-8
|
|
|
|
|
|
|
|
describe ICU::CharDet::Detector do
|
2010-05-10 04:29:55 +08:00
|
|
|
|
2011-08-18 07:13:12 +08:00
|
|
|
let(:detector) { ICU::CharDet::Detector.new }
|
2010-05-10 03:33:20 +08:00
|
|
|
|
|
|
|
it "should recognize UTF-8" do
|
2011-08-18 07:13:12 +08:00
|
|
|
m = detector.detect("æåø")
|
2020-10-08 02:27:02 +08:00
|
|
|
expect(m.name).to eq("UTF-8")
|
|
|
|
expect(m.language).to be_a(String)
|
2010-05-10 03:33:20 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
it "has a list of detectable charsets" do
|
2011-08-18 07:13:12 +08:00
|
|
|
cs = detector.detectable_charsets
|
2020-10-08 02:27:02 +08:00
|
|
|
expect(cs).to be_an(Array)
|
|
|
|
expect(cs).to_not be_empty
|
2010-05-10 04:29:55 +08:00
|
|
|
|
2020-10-08 02:27:02 +08:00
|
|
|
expect(cs.first).to be_a(String)
|
2010-05-10 03:33:20 +08:00
|
|
|
end
|
|
|
|
|
2010-05-10 04:29:55 +08:00
|
|
|
it "should disable / enable the input filter" do
|
2020-10-08 02:27:02 +08:00
|
|
|
expect(detector.input_filter_enabled?).to be_falsey
|
2011-08-18 07:13:12 +08:00
|
|
|
detector.input_filter_enabled = true
|
2020-10-08 02:27:02 +08:00
|
|
|
expect(detector.input_filter_enabled?).to be_truthy
|
2010-05-10 04:29:55 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
it "should should set declared encoding" do
|
2011-08-18 07:13:12 +08:00
|
|
|
detector.declared_encoding = "UTF-8"
|
2010-05-10 04:29:55 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
it "should detect several matching encodings" do
|
2020-10-08 02:27:02 +08:00
|
|
|
expect(detector.detect_all("foo bar")).to be_an(Array)
|
2010-05-10 04:29:55 +08:00
|
|
|
end
|
|
|
|
|
2015-06-04 00:52:41 +08:00
|
|
|
it "should support null bytes" do
|
|
|
|
# Create a utf-16 string and then force it to binary (ascii) to mimic data from net/http
|
|
|
|
string = "foo".encode("UTF-16").force_encoding("binary")
|
|
|
|
m = detector.detect(string)
|
2020-10-08 02:27:02 +08:00
|
|
|
expect(m.name).to eq("UTF-16BE")
|
|
|
|
expect(m.language).to be_a(String)
|
2015-06-04 00:52:41 +08:00
|
|
|
end
|
2010-05-10 03:33:20 +08:00
|
|
|
end
|