stream inst-fs direct uploads

fixes RECNVS-547

generally, in Multipart::Post, you can get a stream representation of
the body instead of a string, and in CanvasHttp.post you can request
that stream be supplied to the underlying request's body_stream instead
of the string to the underlying body. inst-fs then supplies the
streaming flag during direct upload.

this avoids an error when the body as a single string is too large to
POST.

test-plan:
- confirm general upload-from-canvas functionality is preserved by
  triggering uploads from canvas to another server
  - e.g. inst-fs enabled course export
  - e.g. inst-fs enabled course export
- confirm specific upload-from-canvas functionality with streamed larger
  files
  - i.e. inst-fs enabled course export where the export size is larger
    than 2GB
  - note that there may be a new HTTP 504 error when the upload size
    approaches 3GB. this is a separate issue not related to this commit

Change-Id: I15e22e6688233a77783d1cd86960bbaaa64eb0dc
Reviewed-on: https://gerrit.instructure.com/160105
Reviewed-by: Cody Cutrer <cody@instructure.com>
Tested-by: Jenkins
QA-Review: Michael Jasper <mjasper@instructure.com>
Product-Review: Jacob Fugal <jacob@instructure.com>
This commit is contained in:
Jacob Fugal 2018-08-08 11:11:32 -06:00
parent a3aa082831
commit 3da400a75e
13 changed files with 228 additions and 59 deletions

View File

@ -2,3 +2,6 @@ source 'https://rubygems.org'
# Specify your gem's dependencies in canvas_http.gemspec
gemspec
gem 'multipart', path: '../multipart'
gem "canvas_slug", path: "../canvas_slug"

View File

@ -14,8 +14,11 @@ Gem::Specification.new do |spec|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
spec.require_paths = ["lib"]
spec.add_dependency "multipart"
spec.add_development_dependency "bundler", "~> 1.5"
spec.add_development_dependency "rake"
spec.add_development_dependency "rspec", "~> 3.5.0"
spec.add_development_dependency "multipart"
spec.add_development_dependency "webmock", "1.24.6"
end

View File

@ -62,7 +62,7 @@ module CanvasHttp
#
# Eventually it may be expanded to optionally do cert verification as well.
def self.request(request_class, url_str, other_headers = {}, redirect_limit: 3, form_data: nil, multipart: false,
body: nil, content_type: nil)
streaming: false, body: nil, content_type: nil)
last_scheme = nil
last_host = nil
@ -72,21 +72,13 @@ module CanvasHttp
_, uri = CanvasHttp.validate_url(url_str, host: last_host, scheme: last_scheme, check_host: true) # uses the last host and scheme for relative redirects
http = CanvasHttp.connection_for_uri(uri)
multipart_query = nil
if form_data && multipart
multipart_query, multipart_headers = Multipart::Post.new.prepare_query(form_data)
other_headers = other_headers.merge(multipart_headers)
end
request = request_class.new(uri.request_uri, other_headers)
add_form_data(request, form_data) if form_data && !multipart
add_form_data(request, form_data, multipart: multipart, streaming: streaming) if form_data
request.body = body if body
request.content_type = content_type if content_type
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
args = [request]
args << multipart_query if multipart
http.request(*args) do |response|
http.request(request) do |response|
if response.is_a?(Net::HTTPRedirection) && !response.is_a?(Net::HTTPNotModified)
last_host = uri.host
last_scheme = uri.scheme
@ -106,8 +98,16 @@ module CanvasHttp
end
end
def self.add_form_data(request, form_data)
if form_data.is_a?(String)
def self.add_form_data(request, form_data, multipart:, streaming:)
if multipart
if streaming
request.body_stream, header = Multipart::Post.new.prepare_query_stream(form_data)
request.content_length = request.body_stream.size
else
request.body, header = Multipart::Post.new.prepare_query(form_data)
end
request.content_type = header['Content-type']
elsif form_data.is_a?(String)
request.body = form_data
request.content_type = 'application/x-www-form-urlencoded'
else

View File

@ -19,12 +19,17 @@
require 'spec_helper'
require 'webmock'
require 'tempfile'
require 'multipart'
describe "CanvasHttp" do
include WebMock::API
describe ".post" do
before :each do
WebMock::RequestRegistry.instance.reset!
end
it "allows you to send a body" do
url = "www.example.com/a"
body = "abc"
@ -41,6 +46,24 @@ describe "CanvasHttp" do
to_return(status: 200)
expect(CanvasHttp.post(url, body: body, content_type: content_type).code).to eq "200"
end
it "allows you to do a streaming multipart upload" do
url = "www.example.com/a"
file_contents = "file contents"
form_data = { "file.txt" => StringIO.new(file_contents) }
stubbed = stub_request(:post, url).with do |req|
expect(req.headers['Content-Type']).to match(%r{\Amultipart/form-data})
expect(req.body.lines[1]).to match('Content-Disposition: form-data; name="file.txt"; filename="file.txt"')
expect(req.body.lines[2]).to match('Content-Transfer-Encoding: binary')
expect(req.body.lines[3]).to match('Content-Type: text/plain')
expect(req.body.lines[5]).to match('file contents')
end.to_return(:status => 200)
CanvasHttp.post(url, form_data: form_data, multipart: true, streaming: true)
assert_requested(stubbed)
end
end
describe ".get" do

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2014 - present Instructure, Inc.
# Copyright (C) 2018 - present Instructure, Inc.
#
# This file is part of Canvas.
#
@ -25,7 +25,6 @@
### NOW:
## Everything wrong is due to keith@oreilly.com
require "canvas_slug"
require "mime/types"
require "net/http"
require "cgi"
@ -34,5 +33,7 @@ require "base64"
module Multipart
require "multipart/file_param"
require "multipart/param"
require "multipart/terminator"
require "multipart/sequenced_stream"
require "multipart/post"
end

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2014 - present Instructure, Inc.
# Copyright (C) 2018 - present Instructure, Inc.
#
# This file is part of Canvas.
#
@ -19,16 +19,22 @@ module Multipart
class FileParam
attr_accessor :k, :filename, :content
def initialize(k, filename, content)
def initialize(k, content)
@k = k
@filename = filename || "file.csv"
@filename = (content.respond_to?(:path) && content.path) || k.to_s || "file.csv"
@content = content
end
def to_multipart
#return "Content-Disposition: form-data; name=\"#{CGI::escape(k)}\"; filename=\"#{filename}\"\r\n" + "Content-Transfer-Encoding: binary\r\n" + "Content-Type: #{MIME::Types.type_for(@filename)}\r\n\r\n" + content + "\r\n "
# Don't escape mine
return "Content-Disposition: form-data; name=\"#{k}\"; filename=\"#{filename}\"\r\n" + "Content-Transfer-Encoding: binary\r\n" + "Content-Type: #{MIME::Types.type_for(@filename).first}\r\n\r\n" + content + "\r\n"
def to_multipart_stream(boundary)
SequencedStream.new([
StringIO.new("--#{boundary}\r\n" \
"Content-Disposition: form-data; name=\"#{k}\"; filename=\"#{filename}\"\r\n" \
"Content-Transfer-Encoding: binary\r\n" \
"Content-Type: #{MIME::Types.type_for(filename).first}\r\n" \
"\r\n"),
content,
StringIO.new("\r\n")
])
end
end
end
end

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2014 - present Instructure, Inc.
# Copyright (C) 2018 - present Instructure, Inc.
#
# This file is part of Canvas.
#
@ -19,15 +19,25 @@ module Multipart
class Param
attr_accessor :k, :v
def self.from(k, v)
if v.respond_to?(:read)
FileParam.new(k, v)
else
Param.new(k, v)
end
end
def initialize(k, v)
@k = k
@v = v
end
def to_multipart
#return "Content-Disposition: form-data; name=\"#{CGI::escape(k)}\"\r\n\r\n#{v}\r\n"
# Don't escape mine...
return "Content-Disposition: form-data; name=\"#{k}\"\r\n\r\n#{v}\r\n"
def to_multipart_stream(boundary)
StringIO.new("--#{boundary}\r\n" \
"Content-Disposition: form-data; name=\"#{k}\"\r\n" \
"\r\n" +
v.to_s +
"\r\n")
end
end
end
end

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2014 - present Instructure, Inc.
# Copyright (C) 2018 - present Instructure, Inc.
#
# This file is part of Canvas.
#
@ -15,40 +15,41 @@
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
require 'securerandom'
module Multipart
class Post
BOUNDARY = ::CanvasSlug.generate('canvas-rules', 15)
HEADER = {"Content-type" => "multipart/form-data; boundary=" + BOUNDARY}
def header(boundary)
{"Content-type" => "multipart/form-data; boundary=#{boundary}"}
end
def prepare_query (params, field_priority=[])
fp = []
creds = params.delete :basic_auth
if creds
puts creds[:username]
puts creds[:password]
puts Base64.encode64("#{creds[:username]}:#{creds[:password]}")
end
def file_param(k, v)
file_data = v.read rescue nil
if file_data
file_path = (v.respond_to?(:path) && v.path) || k.to_s
FileParam.new(k, file_path, file_data)
else
Param.new(k, v)
end
end
def prepare_query_stream(params, field_priority=[])
params.delete(:basic_auth)
parts = []
completed_fields = {}
field_priority.each do |k|
if params.has_key?(k) && !completed_fields.has_key?(k)
fp.push(file_param(k, params[k]))
completed_fields[k] = true
end
next if completed_fields.key?(k)
next unless params.key?(k)
parts << Param.from(k, params[k])
completed_fields[k] = true
end
params.each { |k, v| fp.push(file_param(k, v)) unless completed_fields.has_key?(k) }
query = fp.collect { |p| "--" + BOUNDARY + "\r\n" + p.to_multipart }.join("") + "--" + BOUNDARY + "--" + "\r\n"
return query, HEADER
params.each do |k, v|
next if completed_fields.key?(k)
parts << Param.from(k, v)
end
parts.push(TERMINATOR)
boundary = ::SecureRandom.hex(32)
streams = parts.map{ |part| part.to_multipart_stream(boundary) }
[SequencedStream.new(streams), header(boundary)]
end
def prepare_query(params, field_priority=[])
stream, header = prepare_query_stream(params, field_priority)
[stream.read, header]
end
end
end

View File

@ -0,0 +1,61 @@
#
# Copyright (C) 2018 - present Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
module Multipart
class SequencedStream
def initialize(streams)
@streams = streams
end
def eof?
@streams.all?(&:eof?)
end
def size
@streams.map(&:size).sum
end
def read(size=nil, outbuf="")
outbuf.replace("")
if size.nil?
# slurp up all remaining contents, even if that's just "" when eof at
# beginning
@streams.each { |stream| outbuf.concat(stream.read) }
return outbuf
elsif size.zero?
# return "" (which is already in outbuf) even if eof at beginning
return outbuf
elsif eof?
# size >= 1 but eof at beginning, return nil
return nil
else
# size >= 1 and not eof at beginning, read up to size
remaining = size
@streams.each do |stream|
break if remaining.zero?
next if stream.eof?
readbuf = ""
if stream.read(remaining, readbuf)
remaining -= readbuf.length
outbuf.concat(readbuf)
end
end
return outbuf
end
end
end
end

View File

@ -0,0 +1,26 @@
#
# Copyright (C) 2018 - present Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
module Multipart
class Terminator
def to_multipart_stream(boundary)
StringIO.new("--#{boundary}--\r\n")
end
end
TERMINATOR = Terminator.new
end

View File

@ -41,4 +41,26 @@ describe Multipart::Post do
expect(params["b"][:filename]).to eq("b")
expect(params["b"][:tempfile].read).to eq("file in mem")
end
it "should prepare_query_stream with a File" do
file = Tempfile.new(["test","txt"])
file.write("file on disk")
file.rewind
stream, header = subject.prepare_query_stream(:a => "string", :b => file)
params = parse_params(stream.read, header)
expect(params["a"]).to eq("string")
expect(params["b"][:filename]).to eq(File.basename(file.path))
expect(params["b"][:tempfile].read).to eq("file on disk")
end
it "should prepare_query_stream with a StringIO" do
file = Tempfile.new(["test","txt"])
file.write("file in mem")
file.rewind
stream, header = subject.prepare_query_stream(:a => "string", :b => file)
params = parse_params(stream.read, header)
expect(params["a"]).to eq("string")
expect(params["b"][:filename]).to eq(File.basename(file.path))
expect(params["b"][:tempfile].read).to eq("file in mem")
end
end

View File

@ -113,7 +113,6 @@ module InstFS
def direct_upload(file_name:, file_object:)
# example of a call to direct_upload:
# > res = InstFS.direct_upload(
# > host: "canvas.docker",
# > file_name: "a.png",
# > file_object: File.open("public/images/a.png")
# > )
@ -124,7 +123,7 @@ module InstFS
data = {}
data[file_name] = file_object
response = CanvasHttp.post(url, form_data: data, multipart:true)
response = CanvasHttp.post(url, form_data: data, multipart: true, streaming: true)
if response.class == Net::HTTPCreated
json_response = JSON.parse(response.body)
return json_response["instfs_uuid"] if json_response.key?("instfs_uuid")

View File

@ -406,6 +406,20 @@ describe InstFS do
)
expect(res).to eq(instfs_uuid)
end
it "requests a streaming upload to allow large files" do
instfs_uuid = "1234-abcd"
expect(CanvasHttp).to receive(:post).with(anything, hash_including(streaming: true)).and_return(double(
class: Net::HTTPCreated,
code: 200,
body: {instfs_uuid: instfs_uuid}.to_json
))
InstFS.direct_upload(
file_name: "a.png",
file_object: File.open("public/images/a.png")
)
end
end
end