upgrade nokogiri and sanitize gems

Updating to nokogiri 1.5.0 requires updating to sanitize 2.0.3 as well.

Since the API for sanitize changed, we take this opportunity to remove
the monkey patching in config/initializers, and use the actual
transformers plugin interface for sanitize.

The changes to html in the specs are due to nokogiri making a couple
changes around empty tags -- html5 wants <img> , not <img />

test plan: The existing specs exercise both gems, to ensure
compatibility.

Change-Id: Id04d017dda056e03205b373ac9bfbf71bd338cb9
Reviewed-on: https://gerrit.instructure.com/7988
Tested-by: Hudson <hudson@instructure.com>
Reviewed-by: Zach Wily <zach@instructure.com>
This commit is contained in:
Brian Palmer 2012-01-10 19:12:36 -07:00
parent 51a12abb3e
commit 74b9386d9c
6 changed files with 64 additions and 65 deletions

View File

@ -31,7 +31,7 @@ gem 'mime-types', '1.16', :require => 'mime/types'
# with mini_magick 3.1
gem 'mini_magick', '1.3.2'
gem 'netaddr', '1.5.0'
gem 'nokogiri', '1.4.7'
gem 'nokogiri', '1.5.0'
gem 'oauth', '0.4.5'
gem 'rack', '~> 1.1.2' # rails requires ~> 1.1.0 but 1.1.0 has a param quoting bug
gem 'rake', '< 0.10'
@ -44,7 +44,7 @@ gem 'ruby-net-ldap', '0.0.4', :require => 'net/ldap'
gem 'ruby-saml-mod', '0.1.4'
gem 'rubycas-client', '2.2.1'
gem 'rubyzip', '0.9.4', :require => 'zip/zip'
gem 'sanitize', '1.2.1'
gem 'sanitize', '2.0.3'
gem 'uuid', '2.3.2'
gem 'will_paginate', '2.3.15'
gem 'xml-simple', '1.0.12', :require => 'xmlsimple'

View File

@ -1,53 +0,0 @@
require 'sanitize'
class Sanitize
# modified from sanitize.rb to support mid-value matching
REGEX_STYLE_PROTOCOL = /([A-Za-z0-9\+\-\.\&\;\#\s]*?)(?:\:|&#0*58|&#x0*3a)/i
REGEX_STYLE_METHOD = /([A-Za-z0-9\+\-\.\&\;\#\s]*?)(?:\(|&#0*40|&#x0*28)/i
alias :original_clean_element! :clean_element!
def clean_element!(node)
res = original_clean_element!(node)
if node['style']
styles = []
style = node['style']
# taken from https://github.com/flavorjones/loofah/blob/master/lib/loofah/html5/scrub.rb
# the gauntlet
style = '' unless style =~ /\A([:,\;#%.\(\)\/\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*\z/
style = '' unless style =~ /\A\s*([-\w]+\s*:[^\;]*(\;\s*|$))*\z/
style.scan(/([-\w]+)\s*:\s*([^;]*)/) do |property, value|
property = property.downcase
valid = (@config[:style_properties] || []).include?(property)
valid ||= (@config[:style_expressions] || []).any?{|e| property.match(e) }
if valid
styles << [property, clean_style_value(value)]
end
end
node['style'] = styles.select { |k,v| v }.map{|k,v| "#{k}: #{v}"}.join('; ') + ";"
end
res
end
def clean_style_value(value)
# checks for any colons anywhere in the string
# to make sure they're preceded by a valid protocol
protocols = @config[:protocols]['style']['any']
# no idea what these are called in css, but it's
# a name followed by open-paren
# (i.e. url(...) or expression(...))
methods = @config[:style_methods]
if methods
value.to_s.downcase.scan(REGEX_STYLE_METHOD) do |match|
return nil if !methods.include?(match[0].downcase)
end
end
if protocols
value.to_s.downcase.scan(REGEX_STYLE_PROTOCOL) do |match|
return nil if !protocols.include?(match[0].downcase)
end
end
value
end
end

View File

@ -68,7 +68,7 @@ describe "Canvas Cartridge importing" do
#compare settings
@copy_to.conclude_at.to_i.should == @copy_from.conclude_at.to_i
@copy_to.start_at.to_i.should == @copy_from.start_at.to_i
@copy_to.syllabus_body.should == (body_with_link % @copy_to.id).gsub(/png">/, 'png" />')
@copy_to.syllabus_body.should == (body_with_link % @copy_to.id)
@copy_to.storage_quota.should_not == @copy_from.storage_quota
@copy_to.name.should_not == @copy_from.name
@copy_to.course_code.should_not == @copy_from.course_code
@ -585,7 +585,7 @@ describe "Canvas Cartridge importing" do
page_2 = @copy_to.wiki.wiki_pages.find_by_migration_id(migration_id)
page_2.title.should == page.title
page_2.url.should == page.url
page_2.body.should == (body_with_link % [ @copy_to.id, @copy_to.id, @copy_to.id, @copy_to.id, @copy_to.id, mod2.id, @copy_to.id, to_att.id ]).gsub(/png">/, 'png" />')
page_2.body.should == (body_with_link % [ @copy_to.id, @copy_to.id, @copy_to.id, @copy_to.id, @copy_to.id, mod2.id, @copy_to.id, to_att.id ]).gsub(/png" \/>/, 'png">')
end
it "should import migrate inline external tool URLs in wiki pages" do
@ -648,7 +648,7 @@ describe "Canvas Cartridge importing" do
asmnt_2 = @copy_to.assignments.find_by_migration_id(migration_id)
asmnt_2.title.should == asmnt.title
asmnt_2.description.should == (body_with_link % @copy_to.id).gsub(/png">/, 'png" />')
asmnt_2.description.should == (body_with_link % @copy_to.id)
asmnt_2.points_possible.should == asmnt.points_possible
asmnt_2.allowed_extensions.should == asmnt.allowed_extensions
asmnt_2.submission_types.should == asmnt.submission_types

View File

@ -39,11 +39,11 @@ describe "Standard Common Cartridge importing" do
file2_id = @course.attachments.find_by_migration_id("I_00006_Media").id
dt = @course.discussion_topics.find_by_migration_id("I_00006_R")
dt.message.should == %{<p>Your face is ugly. <br /><img src="/courses/#{@course.id}/files/#{file1_id}/preview" /></p>}
dt.message.should == %{<p>Your face is ugly. <br><img src="/courses/#{@course.id}/files/#{file1_id}/preview"></p>}
dt.attachment_id = file2_id
dt = @course.discussion_topics.find_by_migration_id("I_00009_R")
dt.message.should == %{<p>Monkeys: Go!</p>\n<ul><li>\n<a href="/courses/#{@course.id}/files/#{file2_id}/preview">angry_person.jpg</a>\n</li>\n<li>\n<a href="/courses/#{@course.id}/files/#{file1_id}/preview">smiling_dog.jpg</a>\n</li>\n</ul>}
dt.message.should == %{<p>Monkeys: Go!</p>\n<ul>\n<li>\n<a href="/courses/#{@course.id}/files/#{file2_id}/preview">angry_person.jpg</a>\n</li>\n<li>\n<a href="/courses/#{@course.id}/files/#{file1_id}/preview">smiling_dog.jpg</a>\n</li>\n</ul>}
end
# This also tests the WebLinks, they are just content tags and don't have their own class

View File

@ -39,7 +39,8 @@ describe "Importing discussion topics" do
topic = DiscussionTopic.find_by_migration_id(data[:migration_id])
topic.title.should == data[:title]
topic.message.index(data[:description]).should_not be_nil
parsed_description = Nokogiri::HTML::DocumentFragment.parse(data[:description]).to_s
topic.message.index(parsed_description).should_not be_nil
if data[:grading]
Assignment.count.should == 1
@ -51,4 +52,4 @@ describe "Importing discussion topics" do
end
end
end
end
end

View File

@ -18,11 +18,61 @@
module Instructure #:nodoc:
module SanitizeField #:nodoc:
def self.included(klass)
klass.extend(ClassMethods)
end
# modified from sanitize.rb to support mid-value matching
REGEX_STYLE_PROTOCOL = /([A-Za-z0-9\+\-\.\&\;\#\s]*?)(?:\:|&#0*58|&#x0*3a)/i
REGEX_STYLE_METHOD = /([A-Za-z0-9\+\-\.\&\;\#\s]*?)(?:\(|&#0*40|&#x0*28)/i
# used as a sanitize.rb transformer, below
def self.sanitize_style(env)
node = env[:node]
styles = []
style = node['style'] || ""
# taken from https://github.com/flavorjones/loofah/blob/master/lib/loofah/html5/scrub.rb
# the gauntlet
style = '' unless style =~ /\A([:,\;#%.\(\)\/\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*\z/
style = '' unless style =~ /\A\s*([-\w]+\s*:[^\;]*(\;\s*|$))*\z/
config = env[:config]
style.scan(/([-\w]+)\s*:\s*([^;]*)/) do |property, value|
property = property.downcase
valid = (config[:style_properties] || []).include?(property)
valid ||= (config[:style_expressions] || []).any?{|e| property.match(e) }
if valid
styles << [property, clean_style_value(config, value)]
end
end
node['style'] = styles.select { |k,v| v }.map{|k,v| "#{k}: #{v}"}.join('; ') + ";"
end
def self.clean_style_value(config, value)
# checks for any colons anywhere in the string
# to make sure they're preceded by a valid protocol
protocols = config[:protocols]['style']['any']
# no idea what these are called in css, but it's
# a name followed by open-paren
# (i.e. url(...) or expression(...))
methods = config[:style_methods]
if methods
value.to_s.downcase.scan(REGEX_STYLE_METHOD) do |match|
return nil if !methods.include?(match[0].downcase)
end
end
if protocols
value.to_s.downcase.scan(REGEX_STYLE_PROTOCOL) do |match|
return nil if !protocols.include?(match[0].downcase)
end
end
value
end
SANITIZE = {
:elements => [
'a', 'b', 'blockquote', 'br', 'caption', 'cite', 'code', 'col',
@ -87,7 +137,8 @@ module Instructure #:nodoc:
/\Alist-style-(?:image|position|type)\z/,
/\Amargin-(?:bottom|left|right|top|offset)\z/,
/\Apadding-(?:bottom|left|right|top)\z/
]
],
:transformers => lambda { |env| Instructure::SanitizeField.sanitize_style(env) if env[:node]['style'] }
}
module ClassMethods