From e28bb762a9e3a78459f4417db0553a6ab12e7019 Mon Sep 17 00:00:00 2001
From: Ryan Shaw
Date: Fri, 5 Apr 2019 14:14:33 -0600
Subject: [PATCH] =?UTF-8?q?Don=E2=80=99t=20load=20sanitize-html=20in=20the?=
=?UTF-8?q?=20browser?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This approach solves the same xss vulnerability, but by doing it a way
we can guarantee that what we are trying to .html() onto the page is
safe (and not just arbitrary user content) instead of having to load the
_huge_ sanitize-html npm package and all of it’s deps
(like postcss, source-map, htmlparser2, buffer, and a bunch of others)
On the frontend so we can use it to sanitize all the untrusted
data-mathml content.
closes: CORE-2748
refs COMMS-1846
Test Plan:
* Go to the calendar, click on a day to add an event, and click on the
"More Options" button.
* Switch to the html editor, and add the following:
* Save the event. Go back to the calendar and click on the newly saved
event, and notice that you don't get XSS'd
* Create another event in the calendar, and this time add the
following via the html editor:
* Save the event, and load it in the calendar. Notice that it still
properly displays this mathml image properly.
* Test the same calendar event with a screen reader, and notice that
it is still accessible
Change-Id: I5fc1e17ecfc8576dfeaffacff8efcb08873bdd5e
Reviewed-on: https://gerrit.instructure.com/188609
Tested-by: Jenkins
Reviewed-by: Landon Gilbert-Bland
QA-Review: Landon Gilbert-Bland
Product-Review: Ryan Shaw
---
app/coffeescripts/str/apiUserContent.js | 41 +++----------------
lib/api/html/content.rb | 7 +++-
package.json | 2 -
spec/apis/html/content_spec.rb | 8 ++--
spec/coffeescripts/str/apiUserContentSpec.js | 10 +----
spec/lib/api/html/content_spec.rb | 4 +-
.../course_copy_quizzes_spec.rb | 4 +-
yarn.lock | 40 ++----------------
8 files changed, 25 insertions(+), 91 deletions(-)
diff --git a/app/coffeescripts/str/apiUserContent.js b/app/coffeescripts/str/apiUserContent.js
index c931b540a2b..8f6f38dfde6 100644
--- a/app/coffeescripts/str/apiUserContent.js
+++ b/app/coffeescripts/str/apiUserContent.js
@@ -19,46 +19,17 @@ import $ from 'jquery'
import _ from 'underscore'
import htmlEscape from 'str/htmlEscape'
import I18n from 'i18n!user_content'
-import mathMLTagNames from 'mathml-tag-names'
-import sanitizeHtml from 'sanitize-html'
-
-// From https://developer.mozilla.org/en-US/docs/Web/MathML/Attribute
-const MATHML_ATTRIBUTES = [
- 'accent', 'accentunder', 'actiontype', 'align', 'alignmentscope', 'altimg', 'altimg-width',
- 'altimg-height', 'altimg-valign', 'alttext', 'bevelled', 'charalign', 'close', 'columnalign',
- 'columnlines', 'columnspacing', 'columnspan', 'columnwidth', 'crossout', 'decimalpoint',
- 'denomalign', 'depth', 'dir', 'display', 'displaystyle', 'edge', 'equalcolumns', 'equalrows',
- 'fence', 'form', 'frame', 'framespacing', 'groupalign', 'height', 'id', 'indentalign',
- 'indentalignfirst', 'indentalignlast', 'indentshift', 'indentshiftfirst', 'indentshiftlast',
- 'indenttarget', 'infixlinebreakstyle', 'largeop', 'length', 'linebreak', 'linebreakmultchar',
- 'linebreakstyle', 'lineleading', 'linethickness', 'location', 'longdivstyle', 'lspace',
- 'lquote', 'mathbackground', 'mathcolor', 'mathsize', 'mathvariant', 'maxsize', 'minlabelspacing',
- 'minsize', 'movablelimits', 'notation', 'numalign', 'open', 'overflow', 'position', 'rowalign',
- 'rowlines', 'rowspacing', 'rowspan', 'rspace', 'rquote', 'scriptlevel', 'scriptminsize',
- 'Starting', 'scriptsizemultiplier', 'selection', 'separator', 'separators', 'shift', 'side',
- 'src', 'stackalign', 'stretchy', 'subscriptshift', 'supscriptshift', 'symmetric', 'voffset',
- 'width', 'xmlns'
-]
-
-function sanitizeMathmlHtml(html) {
- const opts = {
- allowedTags: mathMLTagNames,
- allowedAttributes: {
- '*': MATHML_ATTRIBUTES
- }
- }
- return sanitizeHtml(html, opts)
-}
const apiUserContent = {
-
/*
xsslint safeString.identifier mathml
- xsslint safeString.identifier sanitized
*/
translateMathmlForScreenreaders ($equationImage) {
- const sanitized = sanitizeMathmlHtml($equationImage.data('mathml'))
- const mathml = $('').html(sanitized).html()
+ // note, it is safe to treat the x-canvaslms-safe-mathml as html because it
+ // only ever gets put there by us (in Api::Html::Content::apply_mathml).
+ // Any user content that gets sent to the server will have the
+ // x-canvaslms-safe-mathml attribute stripped out.
+ const mathml = $('').html($equationImage.attr('x-canvaslms-safe-mathml')).html()
const mathmlSpan = $('')
mathmlSpan.html(mathml)
return mathmlSpan
@@ -148,7 +119,7 @@ const apiUserContent = {
$dummy.find('img.equation_image').each((index, equationImage) => {
const $equationImage = $(equationImage)
const mathmlSpan = apiUserContent.translateMathmlForScreenreaders($equationImage)
- $equationImage.removeAttr('data-mathml')
+ $equationImage.removeAttr('x-canvaslms-safe-mathml')
$equationImage.after(mathmlSpan)
})
}
diff --git a/lib/api/html/content.rb b/lib/api/html/content.rb
index 098ec83bdcc..173458e3153 100644
--- a/lib/api/html/content.rb
+++ b/lib/api/html/content.rb
@@ -168,7 +168,12 @@ module Api
mathml = UserContent.latex_to_mathml(equation)
return if mathml.blank?
- node['data-mathml'] = mathml
+ # NOTE: we use "x-canvaslms-safe-mathml" instead of just "data-mathml"
+ # because canvas_sanitize will strip it out on the way in but it won't
+ # strip out data-mathml. This means we can gaurentee that there is never
+ # user input in x-canvaslms-safe-mathml and we can safely pass it to
+ # $el.html() in translateMathmlForScreenreaders in the js in the frontend
+ node['x-canvaslms-safe-mathml'] = mathml
end
end
end
diff --git a/package.json b/package.json
index fa85aab9957..45b9d115e5d 100644
--- a/package.json
+++ b/package.json
@@ -81,7 +81,6 @@
"jquery.elastic": "1.0.0",
"location-origin": "^1.1.4",
"lodash": "^4.16.4",
- "mathml-tag-names": "^2.1.0",
"md5": "^2.2.1",
"mediaelement": "https://github.com/instructure/mediaelement.git#master",
"moment": "^2.10.6",
@@ -106,7 +105,6 @@
"redux": "^4.0.1",
"redux-actions": "^2.6.4",
"redux-thunk": "^2.3.0",
- "sanitize-html": "^1.20.0",
"spin.js": "2.3.2",
"swfobject": "^2.2.1",
"tablesorter": "^2.28.5",
diff --git a/spec/apis/html/content_spec.rb b/spec/apis/html/content_spec.rb
index 23d5abc9a0b..d783953e5cf 100644
--- a/spec/apis/html/content_spec.rb
+++ b/spec/apis/html/content_spec.rb
@@ -31,9 +31,9 @@ describe Api::Html::Content, type: :request do
expect(@node['alt']).to eql(@latex)
end
- it "sets data-mathml" do
+ it "sets x-canvaslms-safe-mathml" do
Api::Html::Content.apply_mathml(@node)
- expect(@node['data-mathml']).to eql(Ritex::Parser.new.parse(@latex))
+ expect(@node['x-canvaslms-safe-mathml']).to eql(Ritex::Parser.new.parse(@latex))
end
end
@@ -52,9 +52,9 @@ describe Api::Html::Content, type: :request do
expect(@node['alt']).to eql(@latex)
end
- it "doesn't set data-mathml" do
+ it "doesn't set x-canvaslms-safe-mathml" do
Api::Html::Content.apply_mathml(@node)
- expect(@node['data-mathml']).to be_nil
+ expect(@node['x-canvaslms-safe-mathml']).to be_nil
end
end
end
diff --git a/spec/coffeescripts/str/apiUserContentSpec.js b/spec/coffeescripts/str/apiUserContentSpec.js
index 6d8cd7e6e59..d914e133ae1 100644
--- a/spec/coffeescripts/str/apiUserContentSpec.js
+++ b/spec/coffeescripts/str/apiUserContentSpec.js
@@ -23,8 +23,8 @@ let mathml_html
QUnit.module('apiUserContent.convert', {
setup() {
mathml_html ="\n" +
- "![]()
\n" +
- "![]()
\n" +
+ "![]()
\n" +
+ "![]()
\n" +
"![]()
\n"+
"
"
}
@@ -35,12 +35,6 @@ test('moves mathml into a screenreader element', () => {
ok(output.includes('
HTML
@@ -559,7 +559,7 @@ equation: 
![]()
![]()