restore and fix "stream inst-fs direct uploads"

fixes RECNVS-583, RECNVS-594, RECNVS-595

reverts commit efdb13e9a9, restoring
3da400a75e, but adds fixes for the bugs
that caused the original revert. in particular:

* refactor to avoid `eof?` (RECNVS-594)

  in some situations, the file contents stream doesn't implement `eof?`.
  the only required interface for `IO.copy_stream` is `size` and `read`,
  so we'll restrict our interface to that and only expect no more than
  that interface of the parts.

* keep trying the same stream on partial read (RECNVS-595)

  the IO#read docs indicate that if the read result has fewer than the
  bytes requested, it's because the stream is at EOF. the
  SequencedStream output sticks to that behavior. but in some
  situations, the file contents stream will return a partial read before
  EOF. so we need to keep asking for the remainder until we either get
  as much as we wanted or an explicit EOF (nil returned).

test-plan:
- ensure the following all still work with inst-fs enabled:
  - content exports, particularly of larger courses. don't just verify
    the file is created and can be downloaded from inst-fs, but the
    generated file should also be a valid zip file (file extension is
    imscc, but format is zip) that can be extracted into the expected
    course contents. exercises RECNVS-595
  - content migrations, particularly of larger courses, where you choose
    a source Canvas course when executing "Import Course Content" in a
    destination Canvas course. also exercises RECNVS-595.
  - SIS imports via API
    - create a simple courses.csv with a new course defined in it a la
      https://canvas.instructure.com/doc/api/file.sis_csv.html
    - POST this CSV to the SIS imports API documented at
      https://canvas.instructure.com/doc/api/sis_imports.html#method.sis_imports_api.create
      use the mechanism described under the text "If you decide to do a raw post,
      you can skip the 'attachment' argument..." in the description of
      the attachment parameter. exercises RECNVS-594.

Change-Id: Ie7e7fecff491ac2344a48d97f1c6b27eb24885c8
Reviewed-on: https://gerrit.instructure.com/162584
Reviewed-by: Xander Moffatt <xmoffatt@instructure.com>
Tested-by: Jenkins
Product-Review: Xander Moffatt <xmoffatt@instructure.com>
QA-Review: Xander Moffatt <xmoffatt@instructure.com>
This commit is contained in:
Jacob Fugal 2018-08-27 09:48:33 -06:00
parent 8ba9047dbc
commit 815821a762
14 changed files with 289 additions and 60 deletions

View File

@ -2,3 +2,6 @@ source 'https://rubygems.org'
# Specify your gem's dependencies in canvas_http.gemspec
gemspec
gem 'multipart', path: '../multipart'
gem "canvas_slug", path: "../canvas_slug"

View File

@ -14,8 +14,11 @@ Gem::Specification.new do |spec|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
spec.require_paths = ["lib"]
spec.add_dependency "multipart"
spec.add_development_dependency "bundler", "~> 1.5"
spec.add_development_dependency "rake"
spec.add_development_dependency "rspec", "~> 3.5.0"
spec.add_development_dependency "multipart"
spec.add_development_dependency "webmock", "1.24.6"
end

View File

@ -62,7 +62,7 @@ module CanvasHttp
#
# Eventually it may be expanded to optionally do cert verification as well.
def self.request(request_class, url_str, other_headers = {}, redirect_limit: 3, form_data: nil, multipart: false,
body: nil, content_type: nil)
streaming: false, body: nil, content_type: nil)
last_scheme = nil
last_host = nil
@ -72,21 +72,13 @@ module CanvasHttp
_, uri = CanvasHttp.validate_url(url_str, host: last_host, scheme: last_scheme, check_host: true) # uses the last host and scheme for relative redirects
http = CanvasHttp.connection_for_uri(uri)
multipart_query = nil
if form_data && multipart
multipart_query, multipart_headers = Multipart::Post.new.prepare_query(form_data)
other_headers = other_headers.merge(multipart_headers)
end
request = request_class.new(uri.request_uri, other_headers)
add_form_data(request, form_data) if form_data && !multipart
add_form_data(request, form_data, multipart: multipart, streaming: streaming) if form_data
request.body = body if body
request.content_type = content_type if content_type
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
args = [request]
args << multipart_query if multipart
http.request(*args) do |response|
http.request(request) do |response|
if response.is_a?(Net::HTTPRedirection) && !response.is_a?(Net::HTTPNotModified)
last_host = uri.host
last_scheme = uri.scheme
@ -106,8 +98,16 @@ module CanvasHttp
end
end
def self.add_form_data(request, form_data)
if form_data.is_a?(String)
def self.add_form_data(request, form_data, multipart:, streaming:)
if multipart
if streaming
request.body_stream, header = Multipart::Post.new.prepare_query_stream(form_data)
request.content_length = request.body_stream.size
else
request.body, header = Multipart::Post.new.prepare_query(form_data)
end
request.content_type = header['Content-type']
elsif form_data.is_a?(String)
request.body = form_data
request.content_type = 'application/x-www-form-urlencoded'
else

View File

@ -19,12 +19,17 @@
require 'spec_helper'
require 'webmock'
require 'tempfile'
require 'multipart'
describe "CanvasHttp" do
include WebMock::API
describe ".post" do
before :each do
WebMock::RequestRegistry.instance.reset!
end
it "allows you to send a body" do
url = "www.example.com/a"
body = "abc"
@ -41,6 +46,24 @@ describe "CanvasHttp" do
to_return(status: 200)
expect(CanvasHttp.post(url, body: body, content_type: content_type).code).to eq "200"
end
it "allows you to do a streaming multipart upload" do
url = "www.example.com/a"
file_contents = "file contents"
form_data = { "file.txt" => StringIO.new(file_contents) }
stubbed = stub_request(:post, url).with do |req|
expect(req.headers['Content-Type']).to match(%r{\Amultipart/form-data})
expect(req.body.lines[1]).to match('Content-Disposition: form-data; name="file.txt"; filename="file.txt"')
expect(req.body.lines[2]).to match('Content-Transfer-Encoding: binary')
expect(req.body.lines[3]).to match('Content-Type: text/plain')
expect(req.body.lines[5]).to match('file contents')
end.to_return(:status => 200)
CanvasHttp.post(url, form_data: form_data, multipart: true, streaming: true)
assert_requested(stubbed)
end
end
describe ".get" do

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2014 - present Instructure, Inc.
# Copyright (C) 2018 - present Instructure, Inc.
#
# This file is part of Canvas.
#
@ -25,7 +25,6 @@
### NOW:
## Everything wrong is due to keith@oreilly.com
require "canvas_slug"
require "mime/types"
require "net/http"
require "cgi"
@ -34,5 +33,7 @@ require "base64"
module Multipart
require "multipart/file_param"
require "multipart/param"
require "multipart/terminator"
require "multipart/sequenced_stream"
require "multipart/post"
end

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2014 - present Instructure, Inc.
# Copyright (C) 2018 - present Instructure, Inc.
#
# This file is part of Canvas.
#
@ -19,16 +19,22 @@ module Multipart
class FileParam
attr_accessor :k, :filename, :content
def initialize(k, filename, content)
def initialize(k, content)
@k = k
@filename = filename || "file.csv"
@filename = (content.respond_to?(:path) && content.path) || k.to_s || "file.csv"
@content = content
end
def to_multipart
#return "Content-Disposition: form-data; name=\"#{CGI::escape(k)}\"; filename=\"#{filename}\"\r\n" + "Content-Transfer-Encoding: binary\r\n" + "Content-Type: #{MIME::Types.type_for(@filename)}\r\n\r\n" + content + "\r\n "
# Don't escape mine
return "Content-Disposition: form-data; name=\"#{k}\"; filename=\"#{filename}\"\r\n" + "Content-Transfer-Encoding: binary\r\n" + "Content-Type: #{MIME::Types.type_for(@filename).first}\r\n\r\n" + content + "\r\n"
def to_multipart_stream(boundary)
SequencedStream.new([
StringIO.new("--#{boundary}\r\n" \
"Content-Disposition: form-data; name=\"#{k}\"; filename=\"#{filename}\"\r\n" \
"Content-Transfer-Encoding: binary\r\n" \
"Content-Type: #{MIME::Types.type_for(filename).first}\r\n" \
"\r\n"),
content,
StringIO.new("\r\n")
])
end
end
end
end

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2014 - present Instructure, Inc.
# Copyright (C) 2018 - present Instructure, Inc.
#
# This file is part of Canvas.
#
@ -19,15 +19,25 @@ module Multipart
class Param
attr_accessor :k, :v
def self.from(k, v)
if v.respond_to?(:read)
FileParam.new(k, v)
else
Param.new(k, v)
end
end
def initialize(k, v)
@k = k
@v = v
end
def to_multipart
#return "Content-Disposition: form-data; name=\"#{CGI::escape(k)}\"\r\n\r\n#{v}\r\n"
# Don't escape mine...
return "Content-Disposition: form-data; name=\"#{k}\"\r\n\r\n#{v}\r\n"
def to_multipart_stream(boundary)
StringIO.new("--#{boundary}\r\n" \
"Content-Disposition: form-data; name=\"#{k}\"\r\n" \
"\r\n" +
v.to_s +
"\r\n")
end
end
end
end

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2014 - present Instructure, Inc.
# Copyright (C) 2018 - present Instructure, Inc.
#
# This file is part of Canvas.
#
@ -15,40 +15,41 @@
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
require 'securerandom'
module Multipart
class Post
BOUNDARY = ::CanvasSlug.generate('canvas-rules', 15)
HEADER = {"Content-type" => "multipart/form-data; boundary=" + BOUNDARY}
def header(boundary)
{"Content-type" => "multipart/form-data; boundary=#{boundary}"}
end
def prepare_query (params, field_priority=[])
fp = []
creds = params.delete :basic_auth
if creds
puts creds[:username]
puts creds[:password]
puts Base64.encode64("#{creds[:username]}:#{creds[:password]}")
end
def file_param(k, v)
file_data = v.read rescue nil
if file_data
file_path = (v.respond_to?(:path) && v.path) || k.to_s
FileParam.new(k, file_path, file_data)
else
Param.new(k, v)
end
end
def prepare_query_stream(params, field_priority=[])
params.delete(:basic_auth)
parts = []
completed_fields = {}
field_priority.each do |k|
if params.has_key?(k) && !completed_fields.has_key?(k)
fp.push(file_param(k, params[k]))
completed_fields[k] = true
end
next if completed_fields.key?(k)
next unless params.key?(k)
parts << Param.from(k, params[k])
completed_fields[k] = true
end
params.each { |k, v| fp.push(file_param(k, v)) unless completed_fields.has_key?(k) }
query = fp.collect { |p| "--" + BOUNDARY + "\r\n" + p.to_multipart }.join("") + "--" + BOUNDARY + "--" + "\r\n"
return query, HEADER
params.each do |k, v|
next if completed_fields.key?(k)
parts << Param.from(k, v)
end
parts << TERMINATOR
boundary = ::SecureRandom.hex(32)
streams = parts.map{ |part| part.to_multipart_stream(boundary) }
[SequencedStream.new(streams), header(boundary)]
end
def prepare_query(params, field_priority=[])
stream, header = prepare_query_stream(params, field_priority)
[stream.read, header]
end
end
end

View File

@ -0,0 +1,54 @@
#
# Copyright (C) 2018 - present Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
module Multipart
class SequencedStream
def initialize(streams)
@streams = streams
end
def size
@streams.map(&:size).sum
end
def read(size=nil, outbuf="")
outbuf.replace("")
if size.nil?
# slurp up all remaining contents, even if that's just "" when eof at
# beginning
@streams.each { |stream| outbuf.concat(stream.read) }
return outbuf
elsif size.zero?
# return "" (which is already in outbuf) even if eof at beginning
return outbuf
else
# size >= 1 and not eof at beginning, read up to size
read_any = false
remaining = size
@streams.each do |stream|
readbuf = ""
while remaining > 0 && stream.read(remaining, readbuf)
read_any = true
remaining -= readbuf.length
outbuf.concat(readbuf)
end
end
return read_any ? outbuf : nil
end
end
end
end

View File

@ -0,0 +1,26 @@
#
# Copyright (C) 2018 - present Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
module Multipart
class Terminator
def to_multipart_stream(boundary)
StringIO.new("--#{boundary}--\r\n")
end
end
TERMINATOR = Terminator.new
end

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2013 - present Instructure, Inc.
# Copyright (C) 2018 - present Instructure, Inc.
#
# This file is part of Canvas.
#
@ -41,4 +41,26 @@ describe Multipart::Post do
expect(params["b"][:filename]).to eq("b")
expect(params["b"][:tempfile].read).to eq("file in mem")
end
it "should prepare_query_stream with a File" do
file = Tempfile.new(["test","txt"])
file.write("file on disk")
file.rewind
stream, header = subject.prepare_query_stream(:a => "string", :b => file)
params = parse_params(stream.read, header)
expect(params["a"]).to eq("string")
expect(params["b"][:filename]).to eq(File.basename(file.path))
expect(params["b"][:tempfile].read).to eq("file on disk")
end
it "should prepare_query_stream with a StringIO" do
file = Tempfile.new(["test","txt"])
file.write("file in mem")
file.rewind
stream, header = subject.prepare_query_stream(:a => "string", :b => file)
params = parse_params(stream.read, header)
expect(params["a"]).to eq("string")
expect(params["b"][:filename]).to eq(File.basename(file.path))
expect(params["b"][:tempfile].read).to eq("file in mem")
end
end

View File

@ -0,0 +1,67 @@
#
# Copyright (C) 2018 - present Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
require 'spec_helper.rb'
describe Multipart::SequencedStream do
def test_copy(content, content_string)
source = Multipart::SequencedStream.new([StringIO.new("prefix|"), content, StringIO.new("|suffix")])
destination = StringIO.new
IO.copy_stream(source, destination)
destination.rewind
expect(destination.read).to eq("prefix|#{content_string}|suffix")
end
it "should work as a source for IO.copy_stream" do
file = Tempfile.new(["test","txt"])
file.write("file on disk")
file.rewind
test_copy(file, "file on disk")
end
class CustomStream
def initialize(content_string, &reader)
@source = StringIO.new(content_string)
@reader = lambda { |*args| reader.(@source, *args) }
end
def size; @source.size; end
def read(*args); @reader.call(*args); end
end
it "only requires `size` and `read` on the component stream" do
# just delegate read to the source, but it's wrapped to hide the other
# StringIO methods
content_string = "howdy, howdy, howdy!"
content = CustomStream.new(content_string) do |source, *args|
source.read(*args)
end
test_copy(content, content_string)
end
it "allows partial read responses without EOF from component stream" do
# leave read() and read(0) alone, but restrict to one byte returned at a
# time for read(n)
content_string = "howdy, howdy, howdy!"
content = CustomStream.new(content_string) do |source, n, *args|
n = 1 if n && n > 0
source.read(n, *args)
end
test_copy(content, content_string)
end
end

View File

@ -114,7 +114,6 @@ module InstFS
def direct_upload(file_name:, file_object:)
# example of a call to direct_upload:
# > res = InstFS.direct_upload(
# > host: "canvas.docker",
# > file_name: "a.png",
# > file_object: File.open("public/images/a.png")
# > )
@ -125,7 +124,7 @@ module InstFS
data = {}
data[file_name] = file_object
response = CanvasHttp.post(url, form_data: data, multipart:true)
response = CanvasHttp.post(url, form_data: data, multipart: true, streaming: true)
if response.class == Net::HTTPCreated
json_response = JSON.parse(response.body)
return json_response["instfs_uuid"] if json_response.key?("instfs_uuid")

View File

@ -406,6 +406,20 @@ describe InstFS do
)
expect(res).to eq(instfs_uuid)
end
it "requests a streaming upload to allow large files" do
instfs_uuid = "1234-abcd"
expect(CanvasHttp).to receive(:post).with(anything, hash_including(streaming: true)).and_return(double(
class: Net::HTTPCreated,
code: 200,
body: {instfs_uuid: instfs_uuid}.to_json
))
InstFS.direct_upload(
file_name: "a.png",
file_object: File.open("public/images/a.png")
)
end
end
end