Fix precise link replacement code

fixes LF-1171
refs LF-956
flag=precise_link_replacements

Test Plan:
- Create a page in Course A
- Withing the page insert a link with a title
  attribute that looks like a course ref link
  e.g.: /courses/{Course A id}/pages/something
- Create an html tag that is more than
  500 inner tags deep.
- Course copy Course A to a B
- Observe the title in the anchor was properly
  replaced to reflect the Course B id and that
  no other errors occured.

Change-Id: Iab61662de0f1314bcfdc7624e8c9ae4909007477
Reviewed-on: https://gerrit.instructure.com/c/canvas-lms/+/338299
Tested-by: Service Cloud Jenkins <svc.cloudjenkins@instructure.com>
Reviewed-by: Eric Saupe <eric.saupe@instructure.com>
QA-Review: Eric Saupe <eric.saupe@instructure.com>
Product-Review: Luis Oliveira <luis.oliveira@instructure.com>
This commit is contained in:
Matheus 2024-01-22 15:37:12 -03:00 committed by Luis Oliveira
parent 1ecba66aa4
commit 0929ab2476
2 changed files with 9 additions and 2 deletions

View File

@ -215,14 +215,14 @@ module UserContent
end
def precise_translate_content(html)
doc = Nokogiri::HTML5::DocumentFragment.parse(html)
doc = Nokogiri::HTML5::DocumentFragment.parse(html, nil, { max_tree_depth: 10_000 })
attributes = %w[value href longdesc src srcset title]
doc.css("img, iframe, video, source, param, a").each do |e|
attributes.each do |attr|
attribute_value = e.attributes[attr]&.value
if attribute_value&.match?(@toplevel_regex)
e.inner_html = e.inner_html.gsub(@toplevel_regex) { |url| replacement(url) } if e.name == "a" && e.inner_html.delete("\n").strip.include?(e["href"].strip)
e.inner_html = e.inner_html.gsub(@toplevel_regex) { |url| replacement(url) } if e.name == "a" && e["href"] && e.inner_html.delete("\n").strip.include?(e["href"].strip)
e.set_attribute(attr, attribute_value.gsub(@toplevel_regex) { |url| replacement(url) })
end
end

View File

@ -119,6 +119,13 @@ describe UserContent do
expect(rewriter.user_can_view_content?(att2)).to be_falsey
end
describe "precise_translate_content" do
it "deals properly with non-href anchors and nodes too deep" do
expect { rewriter.precise_translate_content("<a title='/courses/#{rewriter.context.id}/assignments/5'>non-href link</a>") }.not_to raise_error
expect { rewriter.precise_translate_content("<!DOCTYPE html>" + ("<div>" * 1000)) }.not_to raise_error
end
end
describe "@toplevel_regex" do
let(:regex) do
rewriter.instance_variable_get(:@toplevel_regex)