bump sanitize to latest
[fsc-max-nodes=12] [fsc-timeout=60] * switch lots of parsing to Nokogumbo to keep things consistent * deep CSS sanitization is now built in, and with a proper parser (meaning we can drop our code to do it, and adjust some specs to account for things that _are_ valid) lots of changes because gumbo parsing<->serialization cycle is slightly different: * better job preserving original whitespace * literal non-breaking space characters are converted to entities * <p> tags aren't inserted for the heck of it * several _other_ entities are unnecessary, and output as literal characters * some elements no longer have a closing tag Change-Id: I7c5e36cbd04b8a05f64c9e0af00868dd6b00f4ce Reviewed-on: https://gerrit.instructure.com/c/canvas-lms/+/256444 Tested-by: Service Cloud Jenkins <svc.cloudjenkins@instructure.com> Reviewed-by: Jacob Burroughs <jburroughs@instructure.com> QA-Review: Cody Cutrer <cody@instructure.com> Product-Review: Cody Cutrer <cody@instructure.com>
This commit is contained in:
parent
6aed7fbc6e
commit
160ff3c899
|
@ -95,6 +95,7 @@ gem 'mini_magick', '4.11.0'
|
|||
gem 'multi_json', '1.15.0'
|
||||
gem 'net-ldap', '0.16.3', require: false
|
||||
gem 'nokogiri', '1.10.10', require: false
|
||||
gem 'nokogumbo', '2.0.4'
|
||||
gem 'oauth', '0.5.4', require: false
|
||||
gem 'oauth2', '1.4.4', require: false
|
||||
gem 'oj', '3.10.16'
|
||||
|
@ -117,7 +118,7 @@ gem 'rubyzip', '2.3.0', require: 'zip'
|
|||
gem 'safe_yaml', '1.0.5', require: false
|
||||
gem 'saml2', '3.0.9'
|
||||
gem 'nokogiri-xmlsec-instructure', '0.9.7', require: false
|
||||
gem 'sanitize', '2.1.1', require: false
|
||||
gem 'sanitize', '5.2.3', require: false
|
||||
gem 'sentry-raven', '2.13.0', require: false
|
||||
gem 'guardrail', '2.0.1'
|
||||
gem 'simple_oauth', '0.3.1', require: false
|
||||
|
|
|
@ -26,7 +26,7 @@ module BrokenLinkHelper
|
|||
record = Context.find_asset_by_url(from_url)
|
||||
record ||= Context.get_front_wiki_page_for_course_from_url(from_url)
|
||||
return false unless record
|
||||
body = Nokogiri::HTML(Context.asset_body(record))
|
||||
body = Nokogiri::HTML5(Context.asset_body(record))
|
||||
anchor = body.css("a[href$='#{request.fullpath}']").text
|
||||
return false if anchor.blank?
|
||||
|
||||
|
|
|
@ -467,7 +467,7 @@ module QuizzesHelper
|
|||
editable = hash_get(options, :editable)
|
||||
res = user_content hash_get(question, :question_text)
|
||||
index = 0
|
||||
doc = Nokogiri::HTML.fragment(res)
|
||||
doc = Nokogiri::HTML5.fragment(res)
|
||||
selects = doc.css(".question_input")
|
||||
selects.each do |s|
|
||||
if answer_list && !answer_list.empty?
|
||||
|
@ -486,9 +486,9 @@ module QuizzesHelper
|
|||
else
|
||||
# If existing answer is one of the options, replace it with a span
|
||||
if (opt_tag = s.children.css("option[value='#{a}']").first)
|
||||
s.replace(<<-HTML)
|
||||
<span>#{opt_tag.content}</span>
|
||||
HTML
|
||||
span = doc.fragment("<span />").children.first
|
||||
span.children = opt_tag.children
|
||||
s.swap(span)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -199,7 +199,7 @@ class DiscussionTopic::MaterializedView < ActiveRecord::Base
|
|||
def self.include_mobile_overrides(entries, overrides)
|
||||
entries.each do |entry|
|
||||
if entry["message"]
|
||||
parsed_html = Nokogiri::HTML::DocumentFragment.parse(entry["message"])
|
||||
parsed_html = Nokogiri::HTML5.fragment(entry["message"])
|
||||
Api::Html::Content.add_overrides_to_html(parsed_html, overrides)
|
||||
entry["message"] = parsed_html.to_s
|
||||
end
|
||||
|
|
|
@ -88,7 +88,7 @@ module Importers
|
|||
# replace the entire node with a placeholder
|
||||
result[:old_value] = node.to_xml
|
||||
result[:placeholder] = placeholder(result[:old_value])
|
||||
placeholder_node = Nokogiri::HTML::DocumentFragment.parse(result[:placeholder])
|
||||
placeholder_node = Nokogiri::HTML5.fragment(result[:placeholder])
|
||||
|
||||
node.replace(placeholder_node)
|
||||
else
|
||||
|
|
|
@ -80,7 +80,7 @@ module Importers
|
|||
# see LinkParser for details
|
||||
|
||||
rel_path = link[:rel_path]
|
||||
node = Nokogiri::HTML::DocumentFragment.parse(link[:old_value]).children.first
|
||||
node = Nokogiri::HTML5.fragment(link[:old_value]).children.first
|
||||
new_url = resolve_media_comment_data(node, rel_path)
|
||||
new_url ||= resolve_relative_file_url(rel_path)
|
||||
|
||||
|
|
|
@ -14,9 +14,11 @@ Gem::Specification.new do |spec|
|
|||
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
||||
spec.require_paths = ["lib"]
|
||||
|
||||
spec.add_dependency "sanitize", "2.1.1"
|
||||
# 4.6.3 breaks protocol checking on data-url attributes if all data attributes are allowed
|
||||
spec.add_dependency "sanitize", "~> 5.2", ">= 5.2.3"
|
||||
|
||||
spec.add_development_dependency "bundler", "~> 1.5"
|
||||
spec.add_development_dependency "byebug"
|
||||
spec.add_development_dependency "rake"
|
||||
spec.add_development_dependency "rspec", "~> 3.5.0"
|
||||
end
|
||||
|
|
|
@ -42,56 +42,6 @@ module CanvasSanitize #:nodoc:
|
|||
klass.extend(ClassMethods)
|
||||
end
|
||||
|
||||
# modified from sanitize.rb to support mid-value matching
|
||||
REGEX_STYLE_PROTOCOL = /([A-Za-z0-9\+\-\.\&\;\#\s]*?)(?:\:|�*58|�*3a)/i
|
||||
REGEX_STYLE_METHOD = /([A-Za-z0-9\+\-\.\&\;\#\s]*?)(?:\(|�*40|�*28)/i
|
||||
|
||||
# used as a sanitize.rb transformer, below
|
||||
def self.sanitize_style(env)
|
||||
node = env[:node]
|
||||
styles = []
|
||||
style = node['style'] || ""
|
||||
# taken from https://github.com/flavorjones/loofah/blob/master/lib/loofah/html5/scrub.rb
|
||||
# the gauntlet
|
||||
style = '' unless style =~ /\A([-:,\;#%.\(\)\/\sa-zA-Z0-9!]|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*\z/
|
||||
style = '' unless style =~ /\A\s*([-\w]+\s*:[^\;]*(\;\s*|$))*\z/
|
||||
|
||||
config = env[:config]
|
||||
|
||||
style.scan(/([-\w]+)\s*:\s*([^;]*)/) do |property, value|
|
||||
property = property.downcase
|
||||
valid = (config[:style_properties] || []).include?(property)
|
||||
valid ||= (config[:style_expressions] || []).any? { |e| property.match(e) }
|
||||
if valid
|
||||
styles << [property, clean_style_value(config, value)]
|
||||
end
|
||||
end
|
||||
node['style'] = styles.select { |k, v| v }.map { |k, v| "#{k}: #{v}" }.join('; ') + ";"
|
||||
end
|
||||
|
||||
def self.clean_style_value(config, value)
|
||||
# checks for any colons anywhere in the string
|
||||
# to make sure they're preceded by a valid protocol
|
||||
protocols = config[:protocols]['style']['any']
|
||||
|
||||
# no idea what these are called in css, but it's
|
||||
# a name followed by open-paren
|
||||
# (i.e. url(...) or expression(...))
|
||||
methods = config[:style_methods]
|
||||
|
||||
if methods
|
||||
value.to_s.downcase.scan(REGEX_STYLE_METHOD) do |match|
|
||||
return nil if !methods.include?(match[0].downcase)
|
||||
end
|
||||
end
|
||||
if protocols
|
||||
value.to_s.downcase.scan(REGEX_STYLE_PROTOCOL) do |match|
|
||||
return nil if !protocols.include?(match[0].downcase)
|
||||
end
|
||||
end
|
||||
value
|
||||
end
|
||||
|
||||
DEFAULT_PROTOCOLS = ['http', 'https', :relative].freeze
|
||||
SANITIZE = {
|
||||
:elements => [
|
||||
|
@ -99,7 +49,7 @@ module CanvasSanitize #:nodoc:
|
|||
'hr', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
||||
'del', 'ins', 'iframe', 'font',
|
||||
'colgroup', 'dd', 'div', 'dl', 'dt', 'em', 'figure', 'figcaption', 'i', 'img', 'li', 'ol', 'p', 'pre',
|
||||
'q', 'small', 'source', 'span', 'strike', 'strong', 'sub', 'sup', 'abbr', 'table', 'tbody', 'td',
|
||||
'q', 'small', 'source', 'span', 'strike', 'strong', 'style', 'sub', 'sup', 'abbr', 'table', 'tbody', 'td',
|
||||
'tfoot', 'th', 'thead', 'tr', 'u', 'ul', 'object', 'embed', 'param', 'video', 'track', 'audio',
|
||||
# added to unify tinymce and canvas_sanitize whitelists
|
||||
'address', 'acronym', 'map', 'area','bdo', 'dfn', 'kbd', 'legend', 'samp', 'tt', 'var', 'big',
|
||||
|
@ -339,8 +289,8 @@ module CanvasSanitize #:nodoc:
|
|||
'none' => { 'href' => DEFAULT_PROTOCOLS }.freeze,
|
||||
'semantics' => { 'href' => DEFAULT_PROTOCOLS }.freeze,
|
||||
}.freeze,
|
||||
:style_methods => ['url'].freeze,
|
||||
:style_properties => [
|
||||
css: {
|
||||
properties: ([
|
||||
'background', 'border', 'clear', 'color',
|
||||
'cursor', 'direction', 'display', 'float',
|
||||
'font', 'height', 'left', 'line-height',
|
||||
|
@ -353,20 +303,17 @@ module CanvasSanitize #:nodoc:
|
|||
'top', 'vertical-align',
|
||||
'visibility', 'white-space', 'width',
|
||||
'z-index', 'zoom'
|
||||
].freeze,
|
||||
:style_expressions => [
|
||||
/\Abackground-(?:attachment|color|image|position|repeat)\z/,
|
||||
/\Abackground-position-(?:x|y)\z/,
|
||||
/\Aborder-(?:bottom|collapse|color|left|right|spacing|style|top|width)\z/,
|
||||
/\Aborder-(?:bottom|left|right|top)-(?:color|style|width)\z/,
|
||||
/\Afont-(?:family|size|stretch|style|variant|weight)\z/,
|
||||
/\Alist-style-(?:image|position|type)\z/,
|
||||
/\Amargin-(?:bottom|left|right|top|offset)\z/,
|
||||
/\Apadding-(?:bottom|left|right|top)\z/
|
||||
].freeze,
|
||||
:transformers => lambda { |env|
|
||||
CanvasSanitize.sanitize_style(env) if env[:node]['style']
|
||||
Sanitize.clean_node!(env[:node], {:remove_contents => true}) if env[:node_name] == 'style'
|
||||
] +
|
||||
%w{attachment color image position repeat}.map { |i| "background-#{i}"} +
|
||||
%w{x y}.map { |i| "background-position-#{i}" } +
|
||||
%w{bottom collapse color left right spacing style top width}.map { |i| "border-#{i}" } +
|
||||
%w{bottom left right top}.map { |i| %w{color style width}.map { |j| "border-#{i}-#{j}" } }.flatten +
|
||||
%w{family size stretch style variant width}.map { |i| "font-#{i}" } +
|
||||
%w{image position type}.map { |i| "list-style-#{i}" } +
|
||||
%w{bottom left right top offset}.map { |i| "margin-#{i}" } +
|
||||
%w{bottom left right top}.map { |i| "padding-#{i}" }
|
||||
).to_set.freeze,
|
||||
protocols: DEFAULT_PROTOCOLS
|
||||
}
|
||||
}.freeze
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#
|
||||
|
||||
require 'spec_helper'
|
||||
require 'timeout'
|
||||
|
||||
describe CanvasSanitize do
|
||||
describe "#clean" do
|
||||
|
@ -56,7 +57,7 @@ describe CanvasSanitize do
|
|||
|
||||
it "does not strip track elements" do
|
||||
cleaned = Sanitize.clean("<track src=\"http://google.com\"></track>", CanvasSanitize::SANITIZE)
|
||||
expect(cleaned).to eq("<track src=\"http://google.com\"></track>")
|
||||
expect(cleaned).to eq("<track src=\"http://google.com\">")
|
||||
end
|
||||
|
||||
it "sanitizes javascript protocol in mathml" do
|
||||
|
@ -78,4 +79,106 @@ describe CanvasSanitize do
|
|||
cleaned = Sanitize.clean("<a data-item-href=\"javascript:alert('bad')\">Link</a>", CanvasSanitize::SANITIZE)
|
||||
expect(cleaned).to eq("<a>Link</a>")
|
||||
end
|
||||
|
||||
it "should sanitize style attributes width invalid url protocols" do
|
||||
str = "<div style='width: 200px; background: url(httpx://www.google.com) no-repeat left center; height: 10px;'></div>"
|
||||
res = Sanitize.clean(str, CanvasSanitize::SANITIZE)
|
||||
expect(res).not_to match(/background/)
|
||||
expect(res).not_to match(/google/)
|
||||
expect(res).to match(/width/)
|
||||
expect(res).to match(/height/)
|
||||
end
|
||||
|
||||
it "handles some tricky urls" do
|
||||
str = "<div style=\"width: 200px; background:url('java\nscript:alert(1)'); height: 10px;\"></div>"
|
||||
res = Sanitize.clean(str, CanvasSanitize::SANITIZE)
|
||||
expect(res).not_to match(/background/)
|
||||
expect(res).not_to match(/alert/)
|
||||
expect(res).not_to match(/height/)
|
||||
expect(res).to match(/width/)
|
||||
|
||||
str = "<div style=\"width: 200px; background:url('javascript\n:alert(1)'); height: 10px;\"></div>"
|
||||
res = Sanitize.clean(str, CanvasSanitize::SANITIZE)
|
||||
expect(res).not_to match(/background/)
|
||||
expect(res).not_to match(/alert/)
|
||||
expect(res).not_to match(/height/)
|
||||
expect(res).to match(/width/)
|
||||
|
||||
str = "<div style=\"width: 200px; background:url('javascript:alert(5)'); height: 10px;\"></div>"
|
||||
res = Sanitize.clean(str, CanvasSanitize::SANITIZE)
|
||||
expect(res).not_to match(/background/)
|
||||
expect(res).not_to match(/alert/)
|
||||
expect(res).to match(/height/)
|
||||
expect(res).to match(/width/)
|
||||
end
|
||||
|
||||
it "should sanitize style attributes with invalid methods" do
|
||||
str = "<div style=\"width: 200px; background:expression(); height: 10px;\"></div>"
|
||||
res = Sanitize.clean(str, CanvasSanitize::SANITIZE)
|
||||
expect(res).not_to match(/background/)
|
||||
expect(res).not_to match(/\(/)
|
||||
expect(res).to match(/height/)
|
||||
expect(res).to match(/width/)
|
||||
end
|
||||
|
||||
it "should allow negative values" do
|
||||
str = "<div style='margin: -18px;height: 10px;'></div>"
|
||||
res = Sanitize.clean(str, CanvasSanitize::SANITIZE)
|
||||
expect(res).to match(/margin/)
|
||||
expect(res).to match(/height/)
|
||||
end
|
||||
|
||||
it "should remove non-whitelisted css attributes" do
|
||||
str = "<div style='bacon: 5px; border-left-color: #fff;'></div>"
|
||||
res = Sanitize.clean(str, CanvasSanitize::SANITIZE)
|
||||
expect(res).to match(/border-left-color/)
|
||||
expect(res).not_to match(/bacon/)
|
||||
end
|
||||
|
||||
it "should allow valid css methods with valid css protocols" do
|
||||
str = %{<div style="width: 200px; background: url(http://www.google.com) no-repeat left center; height: 10px;"></div>}
|
||||
res = Sanitize.clean(str, CanvasSanitize::SANITIZE)
|
||||
expect(res).to eq str
|
||||
end
|
||||
|
||||
it "should allow font tags with valid attributes" do
|
||||
str = %{<font face="Comic Sans MS" color="blue" size="3" bacon="yes">hello</font>}
|
||||
res = Sanitize.clean(str, CanvasSanitize::SANITIZE)
|
||||
expect(res).to eq %{<font face="Comic Sans MS" color="blue" size="3">hello</font>}
|
||||
end
|
||||
|
||||
it "should allow valid MathML" do
|
||||
str = %{<math xmlns="http://www.w3.org/1998/Math/MathML"><mrow><mi>a</mi><mo>+</mo><mi>b</mi></mrow></math>}
|
||||
res = Sanitize.clean(str, CanvasSanitize::SANITIZE)
|
||||
expect(res).to eq str
|
||||
end
|
||||
|
||||
it "should strip invalid attributes from MathML" do
|
||||
str = %{<math xmlns="http://www.w3.org/1998/Math/MathML"><mrow><mi foo="bar">a</mi><mo>+</mo><mi>b</mi></mrow></math>}
|
||||
res = Sanitize.clean(str, CanvasSanitize::SANITIZE)
|
||||
expect(res).not_to match(/foo/)
|
||||
end
|
||||
|
||||
it "should remove and not escape contents of style tags" do
|
||||
str = %{<p><style type="text/css">pleaseignoreme: blahblahblah</style>but not me</p>}
|
||||
res = Sanitize.clean(str, CanvasSanitize::SANITIZE)
|
||||
expect(res).to eq "<p>but not me</p>"
|
||||
end
|
||||
|
||||
it "should not be extremely slow with long, weird microsoft styles" do
|
||||
str = %{<span lang="EN" style="font-family: 'Times New Roman','serif'; color: #17375e; font-size: 12pt; mso-fareast-font-family: 'Times New Roman'; mso-themecolor: text2; mso-themeshade: 191; mso-style-textfill-fill-color: #17375E; mso-style-textfill-fill-themecolor: text2; mso-style-textfill-fill-alpha: 100.0%; mso-ansi-language: EN; mso-style-textfill-fill-colortransforms: lumm=75000"><p></p></span>}
|
||||
# the above string took over a minute to sanitize as of 8ae4ba8e
|
||||
Timeout.timeout(1) { Sanitize.clean(str, CanvasSanitize::SANITIZE) }
|
||||
end
|
||||
|
||||
Dir.glob(File.expand_path(File.join(__FILE__, '..', '..', 'fixtures', 'xss', '*.xss'))) do |filename|
|
||||
name = File.split(filename).last
|
||||
it "should sanitize xss attempts for #{name}" do
|
||||
f = File.open(filename)
|
||||
check = f.readline.strip
|
||||
str = f.read
|
||||
res = Sanitize.clean(str, CanvasSanitize::SANITIZE)
|
||||
expect(res.downcase).not_to match(Regexp.new(check.downcase))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Binary file not shown.
|
@ -1,2 +1,2 @@
|
|||
style
|
||||
binding
|
||||
<STYLE>BODY{-moz-binding:url("http://ha.ckers.org/xssmoz.xml#xss")}</STYLE>
|
|
@ -1,2 +1,2 @@
|
|||
<style>
|
||||
javascript
|
||||
<STYLE>li {list-style-image: url("javascript:alert('XSS')");}</STYLE><UL><LI>XSS
|
|
@ -1,2 +1,2 @@
|
|||
style
|
||||
javascript|alert
|
||||
<STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A>
|
|
@ -1,2 +1,2 @@
|
|||
style|css
|
||||
alert|javascript
|
||||
<STYLE type="text/css">BODY{background:url("javascript:alert('XSS')")}</STYLE>
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue