diff --git a/lib/api.rb b/lib/api.rb index 8993c215102..f02c8d2e9c8 100644 --- a/lib/api.rb +++ b/lib/api.rb @@ -492,7 +492,8 @@ module Api # and adds context (e.g. /courses/:id/) if it is missing # exception: it leaves user-context file links alone def process_incoming_html_content(html) - Html::Content.process_incoming(html) + host = request.host_with_port if self.respond_to?(:request) + Html::Content.process_incoming(html, host: host) end def value_to_boolean(value) diff --git a/lib/api/html/content.rb b/lib/api/html/content.rb index 761ebfb7c99..a0909905088 100644 --- a/lib/api/html/content.rb +++ b/lib/api/html/content.rb @@ -21,9 +21,9 @@ require 'nokogiri' module Api module Html class Content - def self.process_incoming(html) + def self.process_incoming(html, host: nil) return html unless html.present? - content = self.new(html) + content = self.new(html, host: host) # shortcut html documents that definitely don't have anything we're interested in return html unless content.might_need_modification? @@ -37,12 +37,12 @@ module Api attr_reader :html - def initialize(html_string, account = nil, include_mobile: false) - @account, @html, @include_mobile = account, html_string, include_mobile + def initialize(html_string, account = nil, include_mobile: false, host: nil) + @account, @html, @include_mobile, @host = account, html_string, include_mobile, host end def might_need_modification? - !!(html =~ %r{verifier=|\D/files|instructure_inline_media_comment}) + !!(html =~ %r{verifier=|['"]/files|instructure_inline_media_comment}) end # Take incoming html from a user or similar and modify it for safe storage and display @@ -131,7 +131,7 @@ module Api end def corrected_link(link) - Html::Link.new(link).to_corrected_s + Html::Link.new(link, host: @host).to_corrected_s end def parsed_html diff --git a/lib/api/html/link.rb b/lib/api/html/link.rb index 5a6734c6976..610585ed015 100644 --- a/lib/api/html/link.rb +++ b/lib/api/html/link.rb @@ -22,13 +22,13 @@ module Api module Html class Link attr_reader :link - def initialize(link_string) - @link = link_string + def initialize(link_string, host: nil) + @link, @host = link_string, host end def to_corrected_s return link if is_not_actually_a_link? || should_skip_correction? - strip_verifier_params(scope_link_to_context(link)) + strip_verifier_params(scope_link_to_context(strip_host(link))) end private @@ -38,6 +38,17 @@ module Api LINK_REGEX = %r{/files/(\d+)/(?:download|preview)} VERIFIER_REGEX = %r{(\?)verifier=[^&]*&?|&verifier=[^&]*} + def strip_host(link) + return link if @host.nil? + uri = URI.parse(link) + if uri.host == @host + fragment = "##{uri.fragment}" if uri.fragment + "#{uri.request_uri}#{fragment}" + else + link + end + end + def strip_verifier_params(local_link) if local_link.include?('verifier=') return local_link.gsub(VERIFIER_REGEX, '\1') @@ -47,11 +58,9 @@ module Api end def scope_link_to_context(local_link) - uri = URI.parse(local_link) - if uri.path.start_with?('/files') + if local_link.start_with?('/files') if attachment && APPLICABLE_CONTEXT_TYPES.include?(attachment.context_type) - uri.path = "/#{attachment.context_type.underscore.pluralize}/#{attachment.context_id}#{uri.path}" - return uri.to_s + return "/#{attachment.context_type.underscore.pluralize}/#{attachment.context_id}" + local_link end end diff --git a/spec/lib/api/html/content_spec.rb b/spec/lib/api/html/content_spec.rb index 035a2a9879e..11943cec343 100644 --- a/spec/lib/api/html/content_spec.rb +++ b/spec/lib/api/html/content_spec.rb @@ -37,11 +37,6 @@ module Api expect(Content.new(string).might_need_modification?).to be(true) end - it 'is true for a link to files that include the host' do - string = "
link" - expect(Content.new(string).might_need_modification?).to be(true) - end - it 'is false for a link to files in a context' do string = "link" expect(Content.new(string).might_need_modification?).to be(false) @@ -56,10 +51,11 @@ module Api describe "#modified_html" do it "scrubs links" do string = "link" - Html::Link.expects(:new).with("http://somelink.com").returns( + host = 'some-host' + Html::Link.expects(:new).with("http://somelink.com", host: host).returns( stub(to_corrected_s: "http://otherlink.com") ) - html = Content.new(string).modified_html + html = Content.new(string, host: host).modified_html expect(html).to match(/otherlink.com/) end diff --git a/spec/lib/api/html/link_spec.rb b/spec/lib/api/html/link_spec.rb index e69577b0be4..2b628b775b6 100644 --- a/spec/lib/api/html/link_spec.rb +++ b/spec/lib/api/html/link_spec.rb @@ -49,11 +49,28 @@ module Api expect(Link.new(raw_link).to_corrected_s).to eq "/courses/1/files/1/download?" end - it 'scopes to the context even if url includes the host' do + it 'scopes to the context if url includes the host' do course_attachment = stub(context_type: "Course", context_id: 1) Attachment.stubs(:where).with(id: "1").returns(stub(first: course_attachment)) - raw_link = "https://account.instructure.com/files/1/download?verifier=123" - expect(Link.new(raw_link).to_corrected_s).to eq "https://account.instructure.com/courses/1/files/1/download?" + host = 'account.instructure.com' + raw_link = "https://#{host}/files/1/download?verifier=123" + expect(Link.new(raw_link, host: host).to_corrected_s).to eq "/courses/1/files/1/download?" + end + + it 'strips the current host from absolute urls' do + course_attachment = stub(context_type: "Course", context_id: 1) + Attachment.stubs(:where).with(id: "1").returns(stub(first: course_attachment)) + host = 'account.instructure.com' + raw_link = "https://#{host}/courses/1/files/1/download?" + expect(Link.new(raw_link, host: host).to_corrected_s).to eq "/courses/1/files/1/download?" + end + + it 'does not scope to the context if url includes a differnt host' do + course_attachment = stub(context_type: "Course", context_id: 1) + Attachment.stubs(:where).with(id: "1").returns(stub(first: course_attachment)) + host = 'account.instructure.com' + raw_link = "https://#{host}/files/1/download" + expect(Link.new(raw_link, host: 'other-host').to_corrected_s).to eq raw_link end end end diff --git a/spec/lib/api_spec.rb b/spec/lib/api_spec.rb index f3e5d975f98..c6b07308f1a 100644 --- a/spec/lib/api_spec.rb +++ b/spec/lib/api_spec.rb @@ -697,6 +697,9 @@ describe Api do context ".process_incoming_html_content" do class T extend Api + def self.request + OpenStruct.new({host_with_port: 'some-host'}) + end end it "should add context to files and remove verifier parameters" do @@ -725,6 +728,11 @@ describe Api do but not here } end + + it 'passes host to Content.process_incoming' do + Api::Html::Content.expects(:process_incoming).with(anything, host: 'some-host') + T.process_incoming_html_content('') + end end context ".paginate" do