ensure zip filenames are UTF-8

if they're not valid UTF-8, interpret them in CP437 (which is
the original encoding used by PKZIP)

test plan: you should be able to upload and extract the
zip file attached to the ticket

fixes CNVS-21430

Change-Id: Ieaf16b03b390403ceec406ec27535c4da20eb6be
Reviewed-on: https://gerrit.instructure.com/57118
Tested-by: Jenkins
Reviewed-by: James Williams  <jamesw@instructure.com>
QA-Review: Jahnavi Yetukuri <jyetukuri@instructure.com>
Product-Review: Jeremy Stanley <jeremy@instructure.com>
This commit is contained in:
Jeremy Stanley 2015-06-24 11:35:28 -06:00
parent 1d858ae41c
commit dd46465840
2 changed files with 20 additions and 3 deletions

View File

@ -162,10 +162,12 @@ class CanvasUnzip
end
def name
if type == :zip
entry.name
@name ||= if type == :zip
# the standard is DOS (cp437) or UTF-8, although in practice, anything goes
normalize_name(entry.name, 'cp437')
elsif type == :tar
entry.full_name.sub(/^\.\//, '')
# there is no standard. this seems like a reasonable fallback to me
normalize_name(entry.full_name.sub(/^\.\//, ''), 'iso-8859-1')
end
end
@ -206,5 +208,12 @@ class CanvasUnzip
end
end
end
# forces name to UTF-8, converting from fallback_encoding if it isn't UTF-8 to begin with
def normalize_name(name, fallback_encoding)
utf8_name = name.force_encoding('utf-8')
utf8_name = name.force_encoding(fallback_encoding).encode('utf-8') unless utf8_name.valid_encoding?
utf8_name
end
end
end

View File

@ -98,6 +98,14 @@ describe "CanvasUnzip" do
end
end
describe "non-UTF-8 filenames" do
it "converts zip filename entries from cp437 to utf-8" do
stupid_entry = Zip::Entry.new
stupid_entry.name = "mol\x82"
expect(CanvasUnzip::Entry.new(stupid_entry).name).to eq('molé')
end
end
it_behaves_like 'it extracts archives with extension', 'zip'
it_behaves_like 'it extracts archives with extension', 'tar'
it_behaves_like 'it extracts archives with extension', 'tar.gz'