From e28bb762a9e3a78459f4417db0553a6ab12e7019 Mon Sep 17 00:00:00 2001 From: Ryan Shaw Date: Fri, 5 Apr 2019 14:14:33 -0600 Subject: [PATCH] =?UTF-8?q?Don=E2=80=99t=20load=20sanitize-html=20in=20the?= =?UTF-8?q?=20browser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This approach solves the same xss vulnerability, but by doing it a way we can guarantee that what we are trying to .html() onto the page is safe (and not just arbitrary user content) instead of having to load the _huge_ sanitize-html npm package and all of it’s deps (like postcss, source-map, htmlparser2, buffer, and a bunch of others) On the frontend so we can use it to sanitize all the untrusted data-mathml content. closes: CORE-2748 refs COMMS-1846 Test Plan: * Go to the calendar, click on a day to add an event, and click on the "More Options" button. * Switch to the html editor, and add the following: * Save the event. Go back to the calendar and click on the newly saved event, and notice that you don't get XSS'd * Create another event in the calendar, and this time add the following via the html editor: Infinities: (-\infty,\infty) * Save the event, and load it in the calendar. Notice that it still properly displays this mathml image properly. * Test the same calendar event with a screen reader, and notice that it is still accessible Change-Id: I5fc1e17ecfc8576dfeaffacff8efcb08873bdd5e Reviewed-on: https://gerrit.instructure.com/188609 Tested-by: Jenkins Reviewed-by: Landon Gilbert-Bland QA-Review: Landon Gilbert-Bland Product-Review: Ryan Shaw --- app/coffeescripts/str/apiUserContent.js | 41 +++---------------- lib/api/html/content.rb | 7 +++- package.json | 2 - spec/apis/html/content_spec.rb | 8 ++-- spec/coffeescripts/str/apiUserContentSpec.js | 10 +---- spec/lib/api/html/content_spec.rb | 4 +- .../course_copy_quizzes_spec.rb | 4 +- yarn.lock | 40 ++---------------- 8 files changed, 25 insertions(+), 91 deletions(-) diff --git a/app/coffeescripts/str/apiUserContent.js b/app/coffeescripts/str/apiUserContent.js index c931b540a2b..8f6f38dfde6 100644 --- a/app/coffeescripts/str/apiUserContent.js +++ b/app/coffeescripts/str/apiUserContent.js @@ -19,46 +19,17 @@ import $ from 'jquery' import _ from 'underscore' import htmlEscape from 'str/htmlEscape' import I18n from 'i18n!user_content' -import mathMLTagNames from 'mathml-tag-names' -import sanitizeHtml from 'sanitize-html' - -// From https://developer.mozilla.org/en-US/docs/Web/MathML/Attribute -const MATHML_ATTRIBUTES = [ - 'accent', 'accentunder', 'actiontype', 'align', 'alignmentscope', 'altimg', 'altimg-width', - 'altimg-height', 'altimg-valign', 'alttext', 'bevelled', 'charalign', 'close', 'columnalign', - 'columnlines', 'columnspacing', 'columnspan', 'columnwidth', 'crossout', 'decimalpoint', - 'denomalign', 'depth', 'dir', 'display', 'displaystyle', 'edge', 'equalcolumns', 'equalrows', - 'fence', 'form', 'frame', 'framespacing', 'groupalign', 'height', 'id', 'indentalign', - 'indentalignfirst', 'indentalignlast', 'indentshift', 'indentshiftfirst', 'indentshiftlast', - 'indenttarget', 'infixlinebreakstyle', 'largeop', 'length', 'linebreak', 'linebreakmultchar', - 'linebreakstyle', 'lineleading', 'linethickness', 'location', 'longdivstyle', 'lspace', - 'lquote', 'mathbackground', 'mathcolor', 'mathsize', 'mathvariant', 'maxsize', 'minlabelspacing', - 'minsize', 'movablelimits', 'notation', 'numalign', 'open', 'overflow', 'position', 'rowalign', - 'rowlines', 'rowspacing', 'rowspan', 'rspace', 'rquote', 'scriptlevel', 'scriptminsize', - 'Starting', 'scriptsizemultiplier', 'selection', 'separator', 'separators', 'shift', 'side', - 'src', 'stackalign', 'stretchy', 'subscriptshift', 'supscriptshift', 'symmetric', 'voffset', - 'width', 'xmlns' -] - -function sanitizeMathmlHtml(html) { - const opts = { - allowedTags: mathMLTagNames, - allowedAttributes: { - '*': MATHML_ATTRIBUTES - } - } - return sanitizeHtml(html, opts) -} const apiUserContent = { - /* xsslint safeString.identifier mathml - xsslint safeString.identifier sanitized */ translateMathmlForScreenreaders ($equationImage) { - const sanitized = sanitizeMathmlHtml($equationImage.data('mathml')) - const mathml = $('
').html(sanitized).html() + // note, it is safe to treat the x-canvaslms-safe-mathml as html because it + // only ever gets put there by us (in Api::Html::Content::apply_mathml). + // Any user content that gets sent to the server will have the + // x-canvaslms-safe-mathml attribute stripped out. + const mathml = $('
').html($equationImage.attr('x-canvaslms-safe-mathml')).html() const mathmlSpan = $('') mathmlSpan.html(mathml) return mathmlSpan @@ -148,7 +119,7 @@ const apiUserContent = { $dummy.find('img.equation_image').each((index, equationImage) => { const $equationImage = $(equationImage) const mathmlSpan = apiUserContent.translateMathmlForScreenreaders($equationImage) - $equationImage.removeAttr('data-mathml') + $equationImage.removeAttr('x-canvaslms-safe-mathml') $equationImage.after(mathmlSpan) }) } diff --git a/lib/api/html/content.rb b/lib/api/html/content.rb index 098ec83bdcc..173458e3153 100644 --- a/lib/api/html/content.rb +++ b/lib/api/html/content.rb @@ -168,7 +168,12 @@ module Api mathml = UserContent.latex_to_mathml(equation) return if mathml.blank? - node['data-mathml'] = mathml + # NOTE: we use "x-canvaslms-safe-mathml" instead of just "data-mathml" + # because canvas_sanitize will strip it out on the way in but it won't + # strip out data-mathml. This means we can gaurentee that there is never + # user input in x-canvaslms-safe-mathml and we can safely pass it to + # $el.html() in translateMathmlForScreenreaders in the js in the frontend + node['x-canvaslms-safe-mathml'] = mathml end end end diff --git a/package.json b/package.json index fa85aab9957..45b9d115e5d 100644 --- a/package.json +++ b/package.json @@ -81,7 +81,6 @@ "jquery.elastic": "1.0.0", "location-origin": "^1.1.4", "lodash": "^4.16.4", - "mathml-tag-names": "^2.1.0", "md5": "^2.2.1", "mediaelement": "https://github.com/instructure/mediaelement.git#master", "moment": "^2.10.6", @@ -106,7 +105,6 @@ "redux": "^4.0.1", "redux-actions": "^2.6.4", "redux-thunk": "^2.3.0", - "sanitize-html": "^1.20.0", "spin.js": "2.3.2", "swfobject": "^2.2.1", "tablesorter": "^2.28.5", diff --git a/spec/apis/html/content_spec.rb b/spec/apis/html/content_spec.rb index 23d5abc9a0b..d783953e5cf 100644 --- a/spec/apis/html/content_spec.rb +++ b/spec/apis/html/content_spec.rb @@ -31,9 +31,9 @@ describe Api::Html::Content, type: :request do expect(@node['alt']).to eql(@latex) end - it "sets data-mathml" do + it "sets x-canvaslms-safe-mathml" do Api::Html::Content.apply_mathml(@node) - expect(@node['data-mathml']).to eql(Ritex::Parser.new.parse(@latex)) + expect(@node['x-canvaslms-safe-mathml']).to eql(Ritex::Parser.new.parse(@latex)) end end @@ -52,9 +52,9 @@ describe Api::Html::Content, type: :request do expect(@node['alt']).to eql(@latex) end - it "doesn't set data-mathml" do + it "doesn't set x-canvaslms-safe-mathml" do Api::Html::Content.apply_mathml(@node) - expect(@node['data-mathml']).to be_nil + expect(@node['x-canvaslms-safe-mathml']).to be_nil end end end diff --git a/spec/coffeescripts/str/apiUserContentSpec.js b/spec/coffeescripts/str/apiUserContentSpec.js index 6d8cd7e6e59..d914e133ae1 100644 --- a/spec/coffeescripts/str/apiUserContentSpec.js +++ b/spec/coffeescripts/str/apiUserContentSpec.js @@ -23,8 +23,8 @@ let mathml_html QUnit.module('apiUserContent.convert', { setup() { mathml_html ="
    \n" + - "
  • \n" + - "
  • \n" + + "
  • \n" + + "
  • \n" + "
  • \n"+ "
" } @@ -35,12 +35,6 @@ test('moves mathml into a screenreader element', () => { ok(output.includes(' { - const xss_mathml = "" - const output = apiUserContent.convert(xss_mathml) - ok(!output.includes('onerror')) -}) - test('mathml need not be screenreadered if editing content (this would start an update loop)', () => { const output = apiUserContent.convert(mathml_html, {forEditing: true}) ok(!output.includes('\n"\ - "
  • \n"\ + "
  • \n"\ + "
  • \n"\ "
  • \n
    " expect(html).to eq(expected) end diff --git a/spec/models/content_migration/course_copy_quizzes_spec.rb b/spec/models/content_migration/course_copy_quizzes_spec.rb index 81472dd86ba..f5286637029 100644 --- a/spec/models/content_migration/course_copy_quizzes_spec.rb +++ b/spec/models/content_migration/course_copy_quizzes_spec.rb @@ -541,7 +541,7 @@ equation: LaTeX: \\sum

    HTML @@ -559,7 +559,7 @@ equation: