smart search: copy embeddings in course copy/blueprint sync
test plan: - have a course with searchable objects of each type (page, assignment, discussion topic, announcement) - do a search from this course to ensure the source course embeddings are up to date - copy the course to a destination course where the smart search feature flag is enabled - run a search in the destination course and confirm the progress bar quickly finishes if you see it at all (we still have to check it because it's possible to do a course copy into an existing shell that is missing embeddings) - copy the course into a destination course where the smart search feature flag is *not* enabled - verify there are no embeddings created or copied into the new destination flag=smart_search closes ADV-57 Change-Id: Ia8d040506359d2c4bd7718e19ba135c04a458af2 Reviewed-on: https://gerrit.instructure.com/c/canvas-lms/+/348353 Reviewed-by: Mysti Lilla <mysti@instructure.com> Reviewed-by: Jonathan Featherstone <jfeatherstone@instructure.com> Tested-by: Service Cloud Jenkins <svc.cloudjenkins@instructure.com> QA-Review: Jeremy Stanley <jeremy@instructure.com> Product-Review: Jeremy Stanley <jeremy@instructure.com>
This commit is contained in:
parent
669c4cabac
commit
65f566dbb3
|
@ -644,6 +644,7 @@ class ContentMigration < ActiveRecord::Base
|
|||
import!(data)
|
||||
|
||||
process_master_deletions(deletions.slice("LearningOutcome", "AssignmentGroup")) if deletions
|
||||
SmartSearch.copy_embeddings(self) if for_master_course_import?
|
||||
|
||||
unless import_immediately?
|
||||
update_import_progress(100)
|
||||
|
@ -1354,6 +1355,53 @@ class ContentMigration < ActiveRecord::Base
|
|||
save!
|
||||
end
|
||||
|
||||
# this is a much-simplified subset of asset_id_mapping that includes only ids,
|
||||
# for only the items imported in this migration
|
||||
def imported_asset_id_map
|
||||
return nil unless imported? && for_course_copy?
|
||||
|
||||
mapping = {}
|
||||
master_template = migration_type == "master_course_import" &&
|
||||
master_course_subscription&.master_template
|
||||
global_ids = master_template.present? || use_global_identifiers?
|
||||
|
||||
migration_settings[:imported_assets].each do |asset_type, dest_ids|
|
||||
klass = asset_type.constantize
|
||||
next unless klass.column_names.include? "migration_id"
|
||||
|
||||
dest_ids = dest_ids.split(",").map(&:to_i)
|
||||
mig_id_to_dest_id = context.shard.activate do
|
||||
scope = klass.where(context:, id: dest_ids)
|
||||
scope = scope.only_discussion_topics if asset_type == "DiscussionTopic"
|
||||
scope.where.not(migration_id: nil)
|
||||
.pluck(:migration_id, :id)
|
||||
.to_h
|
||||
end
|
||||
|
||||
mapping[asset_type] ||= {}
|
||||
if master_template
|
||||
master_template.master_content_tags
|
||||
.where(migration_id: mig_id_to_dest_id.keys)
|
||||
.pluck(:content_id, :migration_id)
|
||||
.each do |src_id, mig_id|
|
||||
mapping[asset_type][src_id] = mig_id_to_dest_id[mig_id]
|
||||
end
|
||||
else
|
||||
source_course.shard.activate do
|
||||
src_ids = klass.where(context: source_course).pluck(:id)
|
||||
src_ids.each do |src_id|
|
||||
asset_string = klass.asset_string(src_id)
|
||||
mig_id = CC::CCHelper.create_key(asset_string, global: global_ids)
|
||||
dest_id = mig_id_to_dest_id[mig_id]
|
||||
mapping[asset_type][src_id] = dest_id if dest_id
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
mapping
|
||||
end
|
||||
|
||||
def destination_hosts
|
||||
return [] unless context
|
||||
|
||||
|
|
|
@ -87,6 +87,10 @@ module MasterCourses::Restrictor
|
|||
|
||||
def mark_as_importing!(cm)
|
||||
@importing_migration = cm if cm&.master_course_subscription
|
||||
# if we are doing a course copy and the source course has up-to-date search embeddings,
|
||||
# we will copy those embeddings in batches instead of regenerating them
|
||||
self.skip_embeddings = true if cm&.for_course_copy? && respond_to?(:skip_embeddings=) &&
|
||||
SmartSearch.up_to_date?(cm.source_course)
|
||||
end
|
||||
|
||||
def skip_downstream_changes!
|
||||
|
|
|
@ -52,6 +52,8 @@ class Canvas::Migration::Worker::CourseCopyWorker < Canvas::Migration::Worker::B
|
|||
cm.update_import_progress(20)
|
||||
|
||||
cm.import_content
|
||||
SmartSearch.copy_embeddings(cm)
|
||||
|
||||
cm.workflow_state = :imported
|
||||
cm.save
|
||||
cm.update_import_progress(100)
|
||||
|
|
|
@ -162,6 +162,10 @@ module SmartSearch
|
|||
end
|
||||
end
|
||||
|
||||
def up_to_date?(course)
|
||||
smart_search_available?(course) && course.search_embedding_version == SmartSearch::EMBEDDING_VERSION
|
||||
end
|
||||
|
||||
# returns [ready, progress]
|
||||
# progress may be < 100 while ready if upgrading embeddings
|
||||
def check_course(course)
|
||||
|
@ -221,5 +225,45 @@ module SmartSearch
|
|||
|
||||
(indexed * 100.0 / total).to_i
|
||||
end
|
||||
|
||||
def copy_embeddings(content_migration)
|
||||
return unless content_migration.for_course_copy? &&
|
||||
content_migration.source_course&.search_embedding_version == EMBEDDING_VERSION &&
|
||||
SmartSearch.smart_search_available?(content_migration.context)
|
||||
|
||||
content_migration.imported_asset_id_map&.each do |class_name, id_mapping|
|
||||
klass = class_name.constantize
|
||||
next unless klass.respond_to?(:embedding_class)
|
||||
|
||||
fk = klass.embedding_foreign_key # i.e. :wiki_page_id
|
||||
|
||||
content_migration.context.shard.activate do
|
||||
klass.embedding_class
|
||||
.where(:version => EMBEDDING_VERSION, fk => id_mapping.values)
|
||||
.in_batches
|
||||
.delete_all
|
||||
end
|
||||
|
||||
content_migration.source_course.shard.activate do
|
||||
klass.embedding_class.where(:version => EMBEDDING_VERSION, fk => id_mapping.keys)
|
||||
.find_in_batches(batch_size: 50) do |src_embeddings|
|
||||
dest_embeddings = src_embeddings.map do |src_embedding|
|
||||
{
|
||||
:embedding => src_embedding.embedding,
|
||||
fk => id_mapping[src_embedding[fk]],
|
||||
:version => EMBEDDING_VERSION,
|
||||
:root_account_id => content_migration.context.root_account_id,
|
||||
:created_at => Time.now.utc,
|
||||
:updated_at => Time.now.utc
|
||||
}
|
||||
end
|
||||
|
||||
content_migration.context.shard.activate do
|
||||
klass.embedding_class.insert_all(dest_embeddings)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -41,6 +41,8 @@ module SmartSearchable
|
|||
include HtmlTextHelper
|
||||
has_many :embeddings, class_name: embedding_class_name, inverse_of: table_name.singularize.to_sym
|
||||
cattr_accessor :search_title_column, :search_body_column
|
||||
attr_accessor :skip_embeddings
|
||||
|
||||
after_save :generate_embeddings, if: :should_generate_embeddings?
|
||||
after_save :delete_embeddings, if: -> { deleted? && saved_change_to_workflow_state? }
|
||||
end
|
||||
|
@ -63,7 +65,7 @@ module SmartSearchable
|
|||
end
|
||||
|
||||
def should_generate_embeddings?
|
||||
return false if deleted?
|
||||
return false if deleted? || skip_embeddings
|
||||
return false unless SmartSearch.smart_search_available?(context)
|
||||
|
||||
saved_changes.key?(self.class.search_title_column) || saved_changes.key?(self.class.search_body_column) ||
|
||||
|
|
|
@ -21,26 +21,29 @@
|
|||
require_relative "../spec_helper"
|
||||
|
||||
describe SmartSearch do
|
||||
before do
|
||||
skip "not available" unless ActiveRecord::Base.connection.table_exists?("wiki_page_embeddings")
|
||||
|
||||
allow(SmartSearch).to receive(:generate_embedding) { |input| input.chars.map(&:ord).fill(0, input.size...1024).slice(0...1024) }
|
||||
allow(SmartSearch).to receive(:bedrock_client).and_return(double)
|
||||
end
|
||||
|
||||
before :once do
|
||||
course_factory
|
||||
@course.wiki_pages.create! title: "red pandas", body: "foo " * 400
|
||||
@course.assignments.create! name: "horse feathers", description: "..."
|
||||
@course.assignments.create! name: "hungry hippos", description: "..."
|
||||
@course.discussion_topics.create! title: "!!!", message: "..."
|
||||
@course.announcements.create! title: "hear ye", message: "..."
|
||||
@course.enable_feature! :smart_search
|
||||
end
|
||||
|
||||
describe "#index_course" do
|
||||
before do
|
||||
skip "not available" unless ActiveRecord::Base.connection.table_exists?("wiki_page_embeddings")
|
||||
allow(SmartSearch).to receive(:generate_embedding).and_return([1] * 1024)
|
||||
end
|
||||
|
||||
before :once do
|
||||
course_factory
|
||||
@course.wiki_pages.create! title: "red pandas", body: "foo " * 400
|
||||
@course.assignments.create! name: "horse feathers", description: "..."
|
||||
@course.assignments.create! name: "hungry hippos", description: "..."
|
||||
@course.discussion_topics.create! title: "!!!", message: "..."
|
||||
@course.enable_feature! :smart_search
|
||||
end
|
||||
|
||||
it "indexes a new course" do
|
||||
SmartSearch.index_course(@course)
|
||||
expect(WikiPageEmbedding.where(wiki_page_id: @course.wiki_pages.select(:id)).count).to eq 2
|
||||
expect(AssignmentEmbedding.where(assignment_id: @course.assignments.select(:id)).count).to eq 2
|
||||
expect(DiscussionTopicEmbedding.where(discussion_topic_id: @course.discussion_topics.select(:id)).count).to eq 1
|
||||
expect(DiscussionTopicEmbedding.where(discussion_topic_id: @course.discussion_topics.select(:id)).count).to eq 2
|
||||
end
|
||||
|
||||
it "indexes only missing items" do
|
||||
|
@ -59,4 +62,110 @@ describe SmartSearch do
|
|||
expect(@course.assignments.first.embeddings.first.version).to eq SmartSearch::EMBEDDING_VERSION
|
||||
end
|
||||
end
|
||||
|
||||
describe "course copy" do
|
||||
def run_course_copy(copy_from, copy_to)
|
||||
@cm = ContentMigration.new(context: copy_to,
|
||||
source_course: copy_from,
|
||||
migration_type: "course_copy_importer",
|
||||
copy_options: { everything: "1" })
|
||||
@cm.migration_settings[:import_immediately] = true
|
||||
@cm.set_default_settings
|
||||
@cm.save!
|
||||
worker = Canvas::Migration::Worker::CourseCopyWorker.new
|
||||
worker.perform(@cm)
|
||||
end
|
||||
|
||||
before :once do
|
||||
next unless ActiveRecord::Base.connection.table_exists?("wiki_page_embeddings")
|
||||
|
||||
@copy_from = @course
|
||||
SmartSearch.index_course(@copy_from)
|
||||
|
||||
@copy_to = course_factory
|
||||
end
|
||||
|
||||
context "destination does not enable smart search feature" do
|
||||
it "does not create or copy embeddings" do
|
||||
run_course_copy(@copy_from, @copy_to)
|
||||
expect(WikiPageEmbedding.where(wiki_page_id: @copy_to.wiki_pages.select(:id)).count).to eq 0
|
||||
expect(AssignmentEmbedding.where(assignment_id: @copy_to.assignments.select(:id)).count).to eq 0
|
||||
expect(DiscussionTopicEmbedding.where(discussion_topic_id: @copy_to.discussion_topics.select(:id)).count).to eq 0
|
||||
end
|
||||
end
|
||||
|
||||
context "destination enables smart search feature" do
|
||||
before :once do
|
||||
@copy_to&.enable_feature! :smart_search
|
||||
end
|
||||
|
||||
it "copies embeddings" do
|
||||
# create some existing embeddings first so we can verify they are overwritten or ignored as appropriate
|
||||
# we will simulate an assignment that was copied previously, and one that is not part of the migration
|
||||
src_assignment = @copy_from.assignments.first
|
||||
@copy_to.assignments.create! name: src_assignment.name, description: src_assignment.description, migration_id: CC::CCHelper.create_key(src_assignment, global: true)
|
||||
distractor = @copy_to.assignments.create! name: "original", description: "..."
|
||||
run_jobs
|
||||
expect(distractor.embeddings.count).to eq 1
|
||||
|
||||
expect(SmartSearch).not_to receive(:generate_embedding)
|
||||
run_course_copy(@copy_from, @copy_to)
|
||||
expect(@copy_from.wiki_pages.first.embeddings.map(&:embedding).sort).to eq @copy_to.wiki_pages.first.embeddings.map(&:embedding).sort
|
||||
expect(@copy_from.assignments.first.embeddings.map(&:embedding).sort).to eq @copy_to.assignments.first.embeddings.map(&:embedding).sort
|
||||
expect(@copy_from.assignments.last.embeddings.map(&:embedding).sort).to eq @copy_to.assignments.last.embeddings.map(&:embedding).sort
|
||||
expect(@copy_from.discussion_topics.only_discussion_topics.first.embeddings.map(&:embedding).sort).to eq @copy_to.discussion_topics.only_discussion_topics.first.embeddings.map(&:embedding).sort
|
||||
expect(@copy_from.announcements.first.embeddings.map(&:embedding).sort).to eq @copy_to.announcements.first.embeddings.map(&:embedding).sort
|
||||
expect(distractor.reload.embeddings.count).to eq 1
|
||||
end
|
||||
|
||||
it "generates embeddings in the destination if the source course doesn't have them or is out of date" do
|
||||
@copy_from.disable_feature! :smart_search
|
||||
run_course_copy(@copy_from, @copy_to)
|
||||
run_jobs
|
||||
expect(SmartSearch).to have_received(:generate_embedding).at_least(6).times
|
||||
expect(WikiPageEmbedding.where(wiki_page_id: @copy_to.wiki_pages.select(:id)).count).to eq 2
|
||||
expect(AssignmentEmbedding.where(assignment_id: @copy_to.assignments.select(:id)).count).to eq 2
|
||||
expect(DiscussionTopicEmbedding.where(discussion_topic_id: @copy_to.discussion_topics.select(:id)).count).to eq 2
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "blueprint sync" do
|
||||
def run_blueprint_sync(template)
|
||||
@mm = MasterCourses::MasterMigration.start_new_migration!(template, nil)
|
||||
run_jobs
|
||||
@mm.reload
|
||||
end
|
||||
|
||||
before :once do
|
||||
next unless ActiveRecord::Base.connection.table_exists?("wiki_page_embeddings")
|
||||
|
||||
@blueprint = @course
|
||||
@template = MasterCourses::MasterTemplate.set_as_master_course(@blueprint)
|
||||
@assoc1 = course_factory
|
||||
@assoc2 = course_factory
|
||||
@template.add_child_course!(@assoc1)
|
||||
@template.add_child_course!(@assoc2)
|
||||
@blueprint.enable_feature! :smart_search
|
||||
@assoc1.enable_feature! :smart_search
|
||||
SmartSearch.index_course(@blueprint)
|
||||
end
|
||||
|
||||
it "copies embeddings to destinations that enable smart search" do
|
||||
expect(SmartSearch).not_to receive(:generate_embedding)
|
||||
run_blueprint_sync(@template)
|
||||
|
||||
expect(@blueprint.wiki_pages.first.embeddings.map(&:embedding).sort).to eq @assoc1.wiki_pages.first.embeddings.map(&:embedding).sort
|
||||
expect(@blueprint.assignments.first.embeddings.map(&:embedding).sort).to eq @assoc1.assignments.first.embeddings.map(&:embedding).sort
|
||||
expect(@blueprint.assignments.last.embeddings.map(&:embedding).sort).to eq @assoc1.assignments.last.embeddings.map(&:embedding).sort
|
||||
expect(@blueprint.discussion_topics.only_discussion_topics.first.embeddings.map(&:embedding).sort).to eq @assoc1.discussion_topics.only_discussion_topics.first.embeddings.map(&:embedding).sort
|
||||
expect(@blueprint.announcements.first.embeddings.map(&:embedding).sort).to eq @assoc1.announcements.first.embeddings.map(&:embedding).sort
|
||||
|
||||
expect(@assoc2.wiki_pages.first.embeddings).to be_empty
|
||||
expect(@assoc2.assignments.first.embeddings).to be_empty
|
||||
expect(@assoc2.assignments.last.embeddings).to be_empty
|
||||
expect(@assoc2.discussion_topics.only_discussion_topics.first.embeddings).to be_empty
|
||||
expect(@assoc2.announcements.first.embeddings).to be_empty
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue