add course link validator whitelist

also don't count a 429 request as unreachable

test plan:
 - create links to a nonexistent domain such as bad_domain.net
 - ensure the course link validator flags them as unreachable
 - do Setting.set('link_validator_whitelisted_hosts', 'bad_domain.net')
 - re-run the course link validator
 - the links should no longer be flagged as unreachable

flag=none
fixes LA-97

Change-Id: I4e46ce252bed527ad8cb584992eb7d12c9df743f
Reviewed-on: https://gerrit.instructure.com/c/canvas-lms/+/220915
Tested-by: Jenkins
Tested-by: Service Cloud Jenkins <svc.cloudjenkins@instructure.com>
Reviewed-by: James Williams <jamesw@instructure.com>
QA-Review: James Williams <jamesw@instructure.com>
Product-Review: Jeremy Stanley <jeremy@instructure.com>
This commit is contained in:
Jeremy Stanley 2019-12-16 09:30:05 -07:00
parent ccbc5b027e
commit 7b49e42385
2 changed files with 32 additions and 1 deletions

View File

@ -274,8 +274,20 @@ class CourseLinkValidator
:missing_item
end
# whitelisted hosts will never be flagged as unavailable
def whitelisted?(url)
@whitelist ||= Setting.get('link_validator_whitelisted_hosts', '').split(',')
return false if @whitelist.empty?
host = URI.parse(url).host
@whitelist.include?(host)
rescue URI::InvalidURIError
false
end
# ping the url and make sure we get a 200
def reachable_url?(url)
return true if whitelisted?(url)
@unavailable_photo_redirect_pattern ||= Regexp.new(Setting.get('unavailable_photo_redirect_pattern', 'yimg\.com/.+/photo_unavailable.png$'))
redirect_proc = lambda do |response|
# flickr does a redirect to this file when a photo is deleted/not found;
@ -295,7 +307,7 @@ class CourseLinkValidator
case response.code
when /^2/ # 2xx code
true
when "401", "403", "503"
when "401", "403", "429", "503"
# we accept unauthorized and forbidden codes here because sometimes servers refuse to serve our requests
# and someone can link to a site that requires authentication anyway - doesn't necessarily make it invalid
true

View File

@ -144,6 +144,25 @@ describe CourseLinkValidator do
expect(issues).to be_empty
end
describe "whitelisted?" do
before :once do
course_factory
end
it "returns false when Setting is absent" do
link_validator = CourseLinkValidator.new(@course)
expect(link_validator.whitelisted?('https://example.com/')).to eq false
end
it "accepts a comma-separated Setting" do
Setting.set('link_validator_whitelisted_hosts', 'foo.com,bar.com')
link_validator = CourseLinkValidator.new(@course)
expect(link_validator.whitelisted?('http://foo.com/foo')).to eq true
expect(link_validator.whitelisted?('http://bar.com/bar')).to eq true
expect(link_validator.whitelisted?('http://baz.com/baz')).to eq false
end
end
describe "insecure hosts" do
def test_url(url)
course_factory