Data fix up for broken links from LF-203

fixes LF-1614
flag=none

Test plan
- Check links from any of the bugs relating
  to LF-203 and ensure this data fix can
  find and fix them

Change-Id: If5c1ad90b322de620287f5af90d11147a7e6ae2a
Reviewed-on: https://gerrit.instructure.com/c/canvas-lms/+/347596
Tested-by: Service Cloud Jenkins <svc.cloudjenkins@instructure.com>
Reviewed-by: Eric Saupe <eric.saupe@instructure.com>
QA-Review: Eric Saupe <eric.saupe@instructure.com>
Product-Review: Mysti Lilla <mysti@instructure.com>
This commit is contained in:
Mysti Lilla 2024-05-16 18:18:21 -06:00
parent 28956ff7bd
commit 25ea374b02
3 changed files with 411 additions and 1 deletions

View File

@ -0,0 +1,135 @@
# frozen_string_literal: true
#
# Copyright (C) 2024 - present Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
require "nokogiri"
module DataFixup::ReplaceBrokenMediaObjectLinks
CONTENT_MAP = {
AssessmentQuestion => :question_data,
Assignment => :description,
Course => :syllabus_body,
DiscussionTopic => :message,
Quizzes::Quiz => nil,
Quizzes::QuizQuestion => :question_data,
WikiPage => :body
}.freeze
def self.update_models(model, field, where_clause, start_at, end_at)
csv_stuff = false
error_csv_stuff = false
file_name = "data_fixup_replace_broken_media_object_links_#{Shard.current.id}_#{model.table_name}_#{field}_#{start_at}_#{end_at}_#{Time.now.to_f}.csv"
csv = CSV.open(file_name, "w")
error_csv = nil
model.where(id: start_at..end_at).where(*where_clause).find_each(strategy: :pluck_ids) do |active_record|
next unless (field && active_record[field]) || active_record.is_a?(Quizzes::Quiz)
if active_record.is_a?(AssessmentQuestion) || active_record.is_a?(Quizzes::QuizQuestion)
question_data = active_record.question_data.dup
question_data["question_text"] = fix_html(question_data["question_text"])
if question_data && question_data["answers"]
question_data["answers"] = active_record["question_data"]["answers"]&.map do |a|
a.merge({ "text" => fix_html(a["text"]) })
end
end
if active_record.question_data.to_hash != question_data.to_hash
csv << [Shard.current.id, active_record.class.table_name, active_record.id, active_record.question_data.to_hash, question_data.to_hash]
csv_stuff = true
active_record.update!(question_data:)
end
elsif active_record.is_a?(Quizzes::Quiz)
active_record.description = fix_html(active_record.description)
active_record.quiz_data = active_record.quiz_data.map do |question|
question = question.merge({ "question_text" => fix_html(question["question_text"]) })
question["answers"] = question["answers"]&.map do |a|
a.merge({ "text" => fix_html(a["text"]) })
end
question
end
if active_record.changed?
csv << [Shard.current.id, active_record.class.table_name, active_record.id, active_record.changed_attributes.to_hash, active_record.attributes.slice(*active_record.changed_attributes.keys).to_hash]
csv_stuff = true
active_record.save!
end
else
new_html = fix_html(active_record[field])
if active_record[field] != new_html
csv << [Shard.current.id, active_record.class.table_name, active_record.id, active_record[field], new_html]
csv_stuff = true
active_record.update!(field => new_html)
end
end
rescue => e
error_csv_stuff = true
error_csv = CSV.open(file_name + "_errors.csv", "a")
error_csv << [Shard.current.id, active_record.class.table_name, active_record.id, e.message]
error_csv.close
end
csv.close
if csv_stuff
Attachment.create!(filename: file_name, uploaded_data: File.open(csv.path), context: Account.site_admin, content_type: "text/csv")
end
if error_csv_stuff
Attachment.create!(filename: file_name + "_errors.csv", uploaded_data: File.open(error_csv.path), context: Account.site_admin, content_type: "text/csv")
end
FileUtils.rm_f(file_name)
FileUtils.rm_f(file_name + "_errors.csv")
end
def self.fix_html(html)
doc = Nokogiri::HTML5::DocumentFragment.parse(html, nil, { max_tree_depth: 10_000 })
doc.css("iframe[src*='file_contents/course%20files']").each do |e|
url_match = e["src"]&.match(%r{media_objects(?:_iframe)?/((?:m-|0_)[0-9a-zA-z]+)})&.[](1)
media_id = url_match if url_match.present?
id_match = e["id"]&.match(/media_comment_((?:m-|0_)[0-9a-zA-z]+)/)&.[](1)
media_id ||= id_match if id_match.present?
media_id ||= e["data-media-id"]
next unless media_id.present?
url = Addressable::URI.parse(e["src"])
url.path = "/media_objects_iframe/#{media_id}"
e.set_attribute("src", url.to_s)
end
doc.to_s
end
def self.create_dataset_jobs(model, field, where_clause)
model.where(*where_clause).find_ids_in_ranges(batch_size: 100_000) do |start_at, end_at|
delay_if_production(
priority: Delayed::LOW_PRIORITY,
n_strand: ["DataFixup::ReplaceBrokenMediaObjectLinks", Shard.current.database_server.id]
).update_models(model, field, where_clause, start_at, end_at)
end
end
def self.run
CONTENT_MAP.each do |model, field|
pattern = "iframe%/file_contents/course\\%20files/"
field_search = ["#{field} LIKE '%#{pattern}%'"]
quiz_field_search = ["description LIKE '%#{pattern}%' OR quiz_data LIKE '%#{pattern}%'"]
where_clause = (model == Quizzes::Quiz) ? quiz_field_search : field_search
delay_if_production(
priority: Delayed::LOW_PRIORITY,
n_strand: ["DataFixup::ReplaceBrokenMediaObjectLinks", Shard.current.database_server.id]
).create_dataset_jobs(model, field, where_clause)
end
end
end

View File

@ -81,7 +81,7 @@ module Factories
{ weight: 0, text: "C", comments: "", id: 7051 }
],
question_type: "multiple_choice_question"
}
}.with_indifferent_access
]
end

View File

@ -0,0 +1,275 @@
# frozen_string_literal: true
#
# Copyright (C) 2024 - present Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
require "spec_helper"
describe DataFixup::ReplaceBrokenMediaObjectLinks do
let(:course) { course_model }
context "bad link types" do
let(:assignment) { course.assignments.create!(submission_types: "online_text_entry", points_possible: 2) }
it "fixes bad links with data-media-id" do
broken_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/courses/1/file_contents/course%20files/unfiled/title" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
assignment.update(description: broken_html)
DataFixup::ReplaceBrokenMediaObjectLinks.run
assignment.reload
fixed_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/media_objects_iframe/m-media" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
expect(assignment.description).to eq fixed_html
end
it "fixes bad links with media_object/media_id" do
broken_html = <<-HTML.strip
<iframe style="width: 320px; height: 14.25rem; display: inline-block;" title="title" data-media-type="audio" src="https://url.instructure.com/courses/1/file_contents/course%20files/media_objects/m-media.mp4" data-media-id="m-media"></iframe>
HTML
assignment.update(description: broken_html)
DataFixup::ReplaceBrokenMediaObjectLinks.run
assignment.reload
fixed_html = <<-HTML.strip
<iframe style="width: 320px; height: 14.25rem; display: inline-block;" title="title" data-media-type="audio" src="https://url.instructure.com/media_objects_iframe/m-media" data-media-id="m-media"></iframe>
HTML
expect(assignment.description).to eq fixed_html
end
it "fixes bad links with media_object_iframe/media_id" do
broken_html = <<-HTML.strip
<iframe style="width: 599px; height: 337px; display: inline-block;" title="title" data-media-type="video" src="https://url.instructure.com/courses/1/file_contents/course%20files/media_objects_iframe/m-media?type=video?type=video" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="undefined"></iframe>
HTML
assignment.update(description: broken_html)
DataFixup::ReplaceBrokenMediaObjectLinks.run
assignment.reload
fixed_html = <<-HTML.strip
<iframe style="width: 599px; height: 337px; display: inline-block;" title="title" data-media-type="video" src="https://url.instructure.com/media_objects_iframe/m-media?type=video?type=video" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="undefined"></iframe>
HTML
expect(assignment.description).to eq fixed_html
end
it "fixes bad links with media_comment" do
broken_html = <<-HTML.strip
<iframe id="media_comment_m-media" class="instructure_inline_media_comment video_comment" style="width: 320px; height: 240px; display: inline-block;" title="" data-media-type="video" src="https://url.instructure.com/courses/1/file_contents/course%20files/media_objects/m-media.mp4" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
assignment.update(description: broken_html)
DataFixup::ReplaceBrokenMediaObjectLinks.run
assignment.reload
fixed_html = <<-HTML.strip
<iframe id="media_comment_m-media" class="instructure_inline_media_comment video_comment" style="width: 320px; height: 240px; display: inline-block;" title="" data-media-type="video" src="https://url.instructure.com/media_objects_iframe/m-media" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
expect(assignment.description).to eq fixed_html
end
it "doesn't update objects with similar types of bad links" do
broken_html = <<-HTML.strip
<a class="instructure_file_link instructure_scribd_file" title="title.docx" href="/courses/1/file_contents/course%20files/test.docx?canvas_download=1&amp;canvas_qs_wrap=1" data-api-returntype="File">Text</a>
<img src="/courses/1/file_contents/course%20files/Syllabus/title.jpg" alt="alt text">
HTML
assignment.update(description: broken_html)
updated_at = assignment.updated_at
DataFixup::ReplaceBrokenMediaObjectLinks.run
assignment.reload
expect(assignment.updated_at).to eq updated_at
end
it "doesn't update objects with iframes with unrecoverable bad links" do
broken_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/courses/1/file_contents/course%20files/unfiled/title" allowfullscreen="allowfullscreen" allow="fullscreen"/>
<a href="/courses/1/file_contents/course%20files/unfiled/title">Text</a>
HTML
assignment.update(description: broken_html)
updated_at = assignment.updated_at
DataFixup::ReplaceBrokenMediaObjectLinks.run
assignment.reload
expect(assignment.updated_at).to eq updated_at
end
it "create CSV report of changes" do
broken_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/courses/1/file_contents/course%20files/unfiled/title" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
assignment.update(description: broken_html)
DataFixup::ReplaceBrokenMediaObjectLinks.run
assignment.reload
broken_csv_html = <<-HTML.strip
"<iframe style=""width: 400px; height: 225px; display: inline-block;"" title=""title"" data-media-type=""video"" src=""/courses/1/file_contents/course%20files/unfiled/title"" allowfullscreen=""allowfullscreen"" allow=""fullscreen"" data-media-id=""m-media""></iframe>"
HTML
fixed_csv_html = <<-HTML.strip
"<iframe style=""width: 400px; height: 225px; display: inline-block;"" title=""title"" data-media-type=""video"" src=""/media_objects_iframe/m-media"" allowfullscreen=""allowfullscreen"" allow=""fullscreen"" data-media-id=""m-media""></iframe>"
HTML
att = Attachment.find_by("filename like 'data_fixup_replace_broken_media_object_links_#{Shard.current.id}_assignments_description_#{assignment.id}_#{assignment.id}%'")
expect(File.read(att.open).strip).to eq("#{Shard.current.id},assignments,#{assignment.id},#{broken_csv_html},#{fixed_csv_html}")
end
end
context "all models" do
it "fixes bad links in assessment question" do
broken_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/courses/1/file_contents/course%20files/unfiled/title" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
assessment_question_bank_model(course:)
question_data = { question_text: broken_html }
aq = assessment_question_model(bank: @bank, question_data:)
DataFixup::ReplaceBrokenMediaObjectLinks.run
aq.reload
fixed_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/media_objects_iframe/m-media" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
expect(aq.question_data["question_text"]).to eq fixed_html
end
it "fixes bad links in course syllabus" do
broken_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/courses/1/file_contents/course%20files/unfiled/title" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
course.update(syllabus_body: broken_html)
DataFixup::ReplaceBrokenMediaObjectLinks.run
course.reload
fixed_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/media_objects_iframe/m-media" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
expect(course.syllabus_body).to eq fixed_html
end
it "fixes bad links in discussion topic message" do
broken_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/courses/1/file_contents/course%20files/unfiled/title" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
discussion_topic_model(context: course, message: broken_html)
DataFixup::ReplaceBrokenMediaObjectLinks.run
@topic.reload
fixed_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/media_objects_iframe/m-media" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
expect(@topic.message).to eq fixed_html
end
it "fixes bad links in quiz description" do
broken_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/courses/1/file_contents/course%20files/unfiled/title" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
quiz_model(course:, description: broken_html)
DataFixup::ReplaceBrokenMediaObjectLinks.run
@quiz.reload
fixed_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/media_objects_iframe/m-media" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
expect(@quiz.description).to eq fixed_html
end
it "fixes bad links in quiz quiz_data" do
broken_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/courses/1/file_contents/course%20files/unfiled/title" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
quiz_model(course:)
quiz_data = test_quiz_data
quiz_data.first[:question_text] = broken_html
@quiz.update(quiz_data:)
DataFixup::ReplaceBrokenMediaObjectLinks.run
@quiz.reload
fixed_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/media_objects_iframe/m-media" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
expect(@quiz.quiz_data.first["question_text"]).to eq fixed_html
end
it "fixes bad links in quiz question" do
broken_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/courses/1/file_contents/course%20files/unfiled/title" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
quiz_model(course:)
qq = @quiz.quiz_questions.create!(question_data: multiple_choice_question_data.merge("question_text" => broken_html))
DataFixup::ReplaceBrokenMediaObjectLinks.run
qq.reload
fixed_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/media_objects_iframe/m-media" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
expect(qq.question_data["question_text"]).to eq fixed_html
end
it "fixes bad links in wiki page body" do
broken_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/courses/1/file_contents/course%20files/unfiled/title" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
wiki_page_model(course:, body: broken_html)
DataFixup::ReplaceBrokenMediaObjectLinks.run
@page.reload
fixed_html = <<-HTML.strip
<iframe style="width: 400px; height: 225px; display: inline-block;" title="title" data-media-type="video" src="/media_objects_iframe/m-media" allowfullscreen="allowfullscreen" allow="fullscreen" data-media-id="m-media"></iframe>
HTML
expect(@page.body).to eq fixed_html
end
end
end