Fix precise link replacement code

fixes LF-1171 refs LF-956 flag=precise_link_replacements Test Plan: - Create a page in Course A - Withing the page insert a link with a title attribute that looks like a course ref link e.g.: /courses/{Course A id}/pages/something - Create an html tag that is more than 500 inner tags deep. - Course copy Course A to a B - Observe the title in the anchor was properly replaced to reflect the Course B id and that no other errors occured. Change-Id: Iab61662de0f1314bcfdc7624e8c9ae4909007477 Reviewed-on: https://gerrit.instructure.com/c/canvas-lms/+/338299 Tested-by: Service Cloud Jenkins <svc.cloudjenkins@instructure.com> Reviewed-by: Eric Saupe <eric.saupe@instructure.com> QA-Review: Eric Saupe <eric.saupe@instructure.com> Product-Review: Luis Oliveira <luis.oliveira@instructure.com>
2024-01-22 15:37:12 -03:00 · 2024-01-22 15:37:12 -03:00 · 0929ab2476
parent 1ecba66aa4
commit 0929ab2476
2 changed files with 9 additions and 2 deletions
--- a/lib/user_content.rb
+++ b/lib/user_content.rb
@ -215,14 +215,14 @@ module UserContent
    end

    def precise_translate_content(html)
-      doc = Nokogiri::HTML5::DocumentFragment.parse(html)
+      doc = Nokogiri::HTML5::DocumentFragment.parse(html, nil, { max_tree_depth: 10_000 })
      attributes = %w[value href longdesc src srcset title]

      doc.css("img, iframe, video, source, param, a").each do |e|
        attributes.each do |attr|
          attribute_value = e.attributes[attr]&.value
          if attribute_value&.match?(@toplevel_regex)
-            e.inner_html = e.inner_html.gsub(@toplevel_regex) { |url| replacement(url) } if e.name == "a" && e.inner_html.delete("\n").strip.include?(e["href"].strip)
+            e.inner_html = e.inner_html.gsub(@toplevel_regex) { |url| replacement(url) } if e.name == "a" && e["href"] && e.inner_html.delete("\n").strip.include?(e["href"].strip)
            e.set_attribute(attr, attribute_value.gsub(@toplevel_regex) { |url| replacement(url) })
          end
        end
--- a/spec/lib/user_content_spec.rb
+++ b/spec/lib/user_content_spec.rb
@ -119,6 +119,13 @@ describe UserContent do
      expect(rewriter.user_can_view_content?(att2)).to be_falsey
    end

+    describe "precise_translate_content" do
+      it "deals properly with non-href anchors and nodes too deep" do
+        expect { rewriter.precise_translate_content("<a title='/courses/#{rewriter.context.id}/assignments/5'>non-href link</a>") }.not_to raise_error
+        expect { rewriter.precise_translate_content("<!DOCTYPE html>" + ("<div>" * 1000)) }.not_to raise_error
+      end
+    end
+
    describe "@toplevel_regex" do
      let(:regex) do
        rewriter.instance_variable_get(:@toplevel_regex)