From 531389c684e024acfbe7b7218c1994d1598caba2 Mon Sep 17 00:00:00 2001 From: Mysti Lilla Date: Mon, 20 Jul 2020 16:36:24 -0600 Subject: [PATCH] Make Favorite backfill their possibly cross-sharded root_account_id fixes INTEROP-5863 flag=none Test plan - Have a favorite course and group with no root_account_id - Run DataFixup::PopulateRootAccountIds.run and ensure they get one - Do it with MRA, and all the other things you can think of because sadness Change-Id: I3c1342271d40e05ba3484f37d244e27c50557e72 Reviewed-on: https://gerrit.instructure.com/c/canvas-lms/+/243052 Tested-by: Service Cloud Jenkins Reviewed-by: Cody Cutrer Reviewed-by: Weston Dransfield QA-Review: Weston Dransfield Product-Review: Xander Moffatt --- .../populate_root_account_id_on_models.rb | 127 +++++++++++++--- ...populate_root_account_id_on_models_spec.rb | 135 ++++++++++++++++-- 2 files changed, 236 insertions(+), 26 deletions(-) diff --git a/lib/data_fixup/populate_root_account_id_on_models.rb b/lib/data_fixup/populate_root_account_id_on_models.rb index d24f93aea50..f1f3222efce 100644 --- a/lib/data_fixup/populate_root_account_id_on_models.rb +++ b/lib/data_fixup/populate_root_account_id_on_models.rb @@ -82,6 +82,7 @@ module DataFixup::PopulateRootAccountIdOnModels DiscussionTopic => :context, DiscussionTopicParticipant => :discussion_topic, EnrollmentState => :enrollment, + Favorite => :context, GradingPeriod => :grading_period_group, GradingPeriodGroup => [{root_account: [:root_account_id, :id]}, :course], GradingStandard => :context, @@ -126,7 +127,7 @@ module DataFixup::PopulateRootAccountIdOnModels }.freeze end - # these are non-association dependencies + # these are non-association dependencies, mostly used for override backfills def self.dependencies { AssetUserAccess => [:attachment, :calendar_event], @@ -151,12 +152,23 @@ module DataFixup::PopulateRootAccountIdOnModels }.freeze end + # table has `root_account_ids` column, not `root_account_id` + # tables listed here should override the populate method above def self.multiple_root_account_ids_tables [ LearningOutcome ].freeze end + + # In case we run into other tables that can't fully finish being filled with + # root account ids, and they have children who need them to pretend they're full + def self.unfillable_tables + [ + DeveloperKey + ].freeze + end + # tables that have been filled for a while already DONE_TABLES = [Account, Assignment, Course, CourseSection, Enrollment, EnrollmentDatesOverride, EnrollmentTerm, Group].freeze @@ -204,17 +216,21 @@ module DataFixup::PopulateRootAccountIdOnModels migration_tables.each_with_object({}) do |(table, assoc), memo| incomplete_tables << table && next unless table.column_names.include?(get_column_name(table)) next if (tables_in_progress + complete_tables + DONE_TABLES).include?(table) + association_hash = hash_association(assoc) direct_relation_associations = replace_polymorphic_associations(table, association_hash) check_if_table_has_root_account(table, direct_relation_associations.keys) ? complete_tables << table && next : incomplete_tables << table - prereqs_ready = (direct_relation_associations.keys + Array(dependencies[table])).all? do |a| - class_name = table.reflections[a.to_s]&.class_name&.constantize || a.to_s.classify.safe_constantize + + all_dependencies = direct_relation_associations.keys + Array(dependencies[table]) + prereqs_ready = all_dependencies.all? do |a| + assoc_reflection = table.reflections[a.to_s] + class_name = assoc_reflection&.class_name&.constantize || a.to_s.classify.safe_constantize if (complete_tables + DONE_TABLES).include?(class_name) true elsif incomplete_tables.include?(class_name) || tables_in_progress.include?(class_name) false else - check_if_table_has_root_account(class_name) ? complete_tables << table && true : incomplete_tables << table && false + check_if_association_has_root_account(table, assoc_reflection) ? complete_tables << table && true : incomplete_tables << table && false end end memo[table] = direct_relation_associations if prereqs_ready @@ -296,19 +312,59 @@ module DataFixup::PopulateRootAccountIdOnModels # [:course, :assignment], it will not check Attachments with a user as the context # and return true (assuming the course and assignment attachments have root account ids) # thus allowing us to pretend the Attachments table has been backfilled where necessary - def self.check_if_table_has_root_account(class_name, associations=[]) - return false if class_name.column_names.exclude?(get_column_name(class_name)) - if associations.blank? || dependencies.key?(class_name) - return unfillable_tables.include?(class_name) || - empty_root_account_column_scope(class_name).none? + def self.check_if_table_has_root_account(table, associations) + return false if table.column_names.exclude?(get_column_name(table)) + return empty_root_account_column_scope(table).none? if associations.blank? || dependencies.key?(table) + + associations.all? do |a| + reflection = table.reflections[a.to_s] + scope = empty_root_account_column_scope(table) + + # when reflection is linked to `context`, it's polymorphic, so: + # - get the polymorphic reflection (i.e. `context`, as opposed to `account`) + # - limit the checking query to just this context type to prevent overlap + # - limit by joining on the `through` association, if present + poly_ref = find_polymorphic_reflection(table, reflection) + if poly_ref + scope = scope.where("#{poly_ref.foreign_type} = '#{reflection.class_name}'") + scope = scope.joins(poly_ref.through_reflection.name) if poly_ref.through_reflection? + end + + # further refine query based on association type + if reflection.belongs_to? || reflection.through_reflection&.belongs_to? + scope = scope.where("#{reflection.foreign_key} IS NOT NULL") + elsif reflection.has_one? + scope = scope.where(id: reflection.klass.select(reflection.foreign_key)) + end + + # are there any nil root account ids? + scope.none? end - associations.all?{|a| empty_root_account_column_scope(class_name).joins(a).none?} end - # In case we run into other tables that can't fully finish being filled with - # root account ids, and they have children who need them to pretend they're full - def self.unfillable_tables - [DeveloperKey] + # An association may have foreign keys for records on other shards, and to + # backfill this table, we need to wait until the the foreign table has been + # filled on all those shards. + # For instance, `favorites` can have a `context_id` that is a course id from + # another shard, so this checks to see if the `courses` tables on all shards + # referenced by cross-shard `context_id`s have been backfilled. + def self.check_if_association_has_root_account(table, assoc_reflection) + class_name = assoc_reflection&.class_name&.constantize + return true if assoc_reflection.nil? + return true if unfillable_tables.include?(class_name) + + # find all cross-shard foreign keys for this association + scope = table.where("#{assoc_reflection.foreign_key} > ?", Shard::IDS_PER_SHARD) + + # is this a polymorphic reflection like `context`? If so, limit the query + # for shard ids to just the context type of this association + poly_ref = find_polymorphic_reflection(table, assoc_reflection) + scope = scope.where("#{poly_ref.foreign_type} = '#{assoc_reflection.class_name}'") if poly_ref + + shard_ids = [Shard.current.id, *scope.select("(#{assoc_reflection.foreign_key}/#{Shard::IDS_PER_SHARD}) as shard_id").distinct.map(&:shard_id)] + + # check associated table on all possible shards for any nil root account ids + empty_root_account_column_scope(class_name).shard(Shard.where(id: shard_ids)).none? end def self.empty_root_account_column_scope(table) @@ -320,6 +376,14 @@ module DataFixup::PopulateRootAccountIdOnModels end end + # some reflections are related to polymorphic reflections like `context`, + # which can help limit queries to the context type. + # this method finds a polymorphic reflection for the given reflection, + # if there is one + def self.find_polymorphic_reflection(table, reflection) + table.reflections.values.find{|values| values.foreign_key == reflection.foreign_key && values.foreign_type} + end + def self.get_column_name(table) multiple_root_account_ids_tables.include?(table) ? "root_account_ids" : "root_account_id" end @@ -328,10 +392,11 @@ module DataFixup::PopulateRootAccountIdOnModels primary_key_field = table.primary_key table.find_ids_in_ranges(start_at: min, end_at: max) do |batch_min, batch_max| associations.each do |assoc, columns| - account_id_column = create_column_names(table.reflections[assoc.to_s], columns) - table.where(primary_key_field => batch_min..batch_max, root_account_id: nil). - joins(assoc). - update_all("root_account_id = #{account_id_column}") + reflection = table.reflections[assoc.to_s] + account_id_column = create_column_names(reflection, columns) + scope = table.where(primary_key_field => batch_min..batch_max, root_account_id: nil) + scope.joins(assoc).update_all("root_account_id = #{account_id_column}") + fill_cross_shard_associations(scope, reflection, account_id_column) unless table == Wiki end end @@ -349,6 +414,32 @@ module DataFixup::PopulateRootAccountIdOnModels names.count == 1 ? names.first : "COALESCE(#{names.join(', ')})" end + def self.fill_cross_shard_associations(scope, reflection, column) + reflection = reflection.through_reflection if reflection.through_reflection? + foreign_key = reflection.foreign_key + min = scope.where("#{foreign_key} > ?", Shard::IDS_PER_SHARD).minimum(foreign_key) + while min + # one shard at a time + foreign_shard = Shard.shard_for(min) + if foreign_shard + scope = scope.where("#{foreign_key} >= ? AND #{foreign_key} < ?", min, (foreign_shard.id + 1) * Shard::IDS_PER_SHARD) + associated_ids = scope.pluck(foreign_key) + root_ids_with_foreign_keys = foreign_shard.activate do + reflection.klass. + select("#{column} AS root_id, array_agg(#{reflection.table_name}.#{reflection.klass.primary_key}) AS foreign_keys"). + where(id: associated_ids). + group("root_id") + end + root_ids_with_foreign_keys.each do |attributes| + foreign_keys = attributes.foreign_keys.map{|fk| Shard.global_id_for(fk, foreign_shard)} + scope.where("#{foreign_key} IN (#{foreign_keys.join(',')})"). + update_all("root_account_id = #{Shard.global_id_for(attributes.root_id, foreign_shard)}") + end + end + min = scope.where("#{foreign_key} > ?", (min / Shard::IDS_PER_SHARD + 1) * Shard::IDS_PER_SHARD).minimum(:id) + end + end + def self.unlock_next_backfill_job(table) # when the current table has been fully backfilled, restart the backfill job # so it can check to see if any new tables can begin working based off of this table diff --git a/spec/lib/data_fixup/populate_root_account_id_on_models_spec.rb b/spec/lib/data_fixup/populate_root_account_id_on_models_spec.rb index a30935f9811..fe32e9a46ce 100644 --- a/spec/lib/data_fixup/populate_root_account_id_on_models_spec.rb +++ b/spec/lib/data_fixup/populate_root_account_id_on_models_spec.rb @@ -30,6 +30,7 @@ describe DataFixup::PopulateRootAccountIdOnModels do shared_examples_for 'a datafixup that populates root_account_id' do let(:record) { raise 'set in examples' } let(:reference_record) { raise 'set in examples' } + let(:sharded) { false } before { record.update_columns(root_account_id: nil) } @@ -41,6 +42,8 @@ describe DataFixup::PopulateRootAccountIdOnModels do reference_record.root_account_id end + expected_root_account_id = Account.find(expected_root_account_id).global_id if sharded + expect { DataFixup::PopulateRootAccountIdOnModels.run }.to change { record.reload.root_account_id }.from(nil).to(expected_root_account_id) @@ -413,6 +416,28 @@ describe DataFixup::PopulateRootAccountIdOnModels do end end + context 'with Favorite' do + context 'with a course context' do + it_behaves_like 'a datafixup that populates root_account_id' do + let(:record) { Favorite.create!(context: @course, user: @user) } + let(:reference_record) { @course } + end + + context 'with sharding' do + specs_require_sharding + + it_behaves_like 'a datafixup that populates root_account_id' do + let(:record) do + user = @user + user.favorites.create!(context: @shard1.activate { course_model(account: account_model) }) + end + let(:reference_record) { @course } + let(:sharded) { true } + end + end + end + end + context 'with GradingPeriod' do it_behaves_like 'a datafixup that populates root_account_id' do let(:record) { grading_periods(count: 1).first } @@ -1069,9 +1094,9 @@ describe DataFixup::PopulateRootAccountIdOnModels do describe '#check_if_table_has_root_account' do it 'should return correctly for tables with root_account_id' do DeveloperKey.create!(account: @course.account) - expect(DataFixup::PopulateRootAccountIdOnModels.check_if_table_has_root_account(DeveloperKey)).to be true + expect(DataFixup::PopulateRootAccountIdOnModels.check_if_table_has_root_account(DeveloperKey, [:account])).to be true - expect(DataFixup::PopulateRootAccountIdOnModels.check_if_table_has_root_account(ContextModule)).to be false + expect(DataFixup::PopulateRootAccountIdOnModels.check_if_table_has_root_account(ContextModule, [:course])).to be false end it 'should return correctly for tables where we only care about certain associations' do @@ -1082,13 +1107,13 @@ describe DataFixup::PopulateRootAccountIdOnModels do # User-context event doesn't have root account id so we use the user's account event = CalendarEvent.create!(context: user_model) expect(DataFixup::PopulateRootAccountIdOnModels.check_if_table_has_root_account( - CalendarEvent + CalendarEvent, [:context_course, :context_group, :context_appointment_group, :context_course_section] )).to be true # manually adding makes the check method think it does, though event.update_columns(root_account_id: @course.root_account_id) expect(DataFixup::PopulateRootAccountIdOnModels.check_if_table_has_root_account( - CalendarEvent + CalendarEvent, [:context_course, :context_group, :context_appointment_group, :context_course_section] )).to be true # adding another User-context event should make it return false, @@ -1102,19 +1127,97 @@ describe DataFixup::PopulateRootAccountIdOnModels do it 'should return correctly for tables with root_account_ids' do LearningOutcome.create!(context: @course, short_description: "test") - expect(DataFixup::PopulateRootAccountIdOnModels.check_if_table_has_root_account(LearningOutcome)).to be true + expect(DataFixup::PopulateRootAccountIdOnModels.check_if_table_has_root_account(LearningOutcome, [])).to be true LearningOutcome.update_all(root_account_ids: nil) - expect(DataFixup::PopulateRootAccountIdOnModels.check_if_table_has_root_account(LearningOutcome)).to be false + expect(DataFixup::PopulateRootAccountIdOnModels.check_if_table_has_root_account(LearningOutcome, [])).to be false end it 'should check the whole table if there are non-association dependencies' do AssetUserAccess.create!(context: user_model, asset_code: @course.asset_string, root_account_id: @course.root_account_id) - expect(DataFixup::PopulateRootAccountIdOnModels.check_if_table_has_root_account(AssetUserAccess)).to be true + expect(DataFixup::PopulateRootAccountIdOnModels.check_if_table_has_root_account(AssetUserAccess, [])).to be true AssetUserAccess.update_all(root_account_id: nil) - expect(DataFixup::PopulateRootAccountIdOnModels.check_if_table_has_root_account(AssetUserAccess)).to be false + expect(DataFixup::PopulateRootAccountIdOnModels.check_if_table_has_root_account(AssetUserAccess, [])).to be false end end + describe '#check_if_association_has_root_account' do + it 'should ignore nil reflections' do + expect(DataFixup::PopulateRootAccountIdOnModels.check_if_association_has_root_account(LearningOutcome, nil)).to be true + end + + it 'should ignore assocations that point to unfillable tables' do + expect(DataFixup::PopulateRootAccountIdOnModels.check_if_association_has_root_account(AccessToken, AccessToken.reflections['developer_key'])).to be true + end + + context 'with_sharding' do + specs_require_sharding + + it 'should only search current shard when there are no cross-shard foreign keys' do + user = @user + course_model(account: account_model) + favorite = Favorite.create!(context: @course, user: user) + favorite.update_columns(root_account_id: nil) + + expect(Shard).to receive(:where).with(hash_including(id: [Shard.current.id])).and_call_original + DataFixup::PopulateRootAccountIdOnModels.check_if_association_has_root_account(Favorite, Favorite.reflections['course']) + end + + it 'should find possible shards from cross-shard foreign keys' do + user = @user + @shard1.activate do + course_model(account: account_model) + end + favorite = Favorite.create!(context: @course, user: user) + favorite.update_columns(root_account_id: nil) + + expect(Shard).to receive(:where).with(hash_including(id: [Shard.current.id, @shard1.id])).and_call_original + DataFixup::PopulateRootAccountIdOnModels.check_if_association_has_root_account(Favorite, Favorite.reflections['course']) + end + + it 'should check current shard for missing root account ids' do + user = @user + course_model(account: account_model) + favorite = Favorite.create!(context: @course, user: user) + favorite.update_columns(root_account_id: nil) + + expect(DataFixup::PopulateRootAccountIdOnModels.check_if_association_has_root_account(Favorite, Favorite.reflections['course'])).to be true + end + + it 'should check other shards for missing root_account_ids' do + user = @user + @shard1.activate do + course_model(account: account_model) + end + favorite = Favorite.create!(context: @course, user: user) + favorite.update_columns(root_account_id: nil) + + expect(DataFixup::PopulateRootAccountIdOnModels.check_if_association_has_root_account(Favorite, Favorite.reflections['course'])).to be true + end + + it 'should actually find missing root account ids on current shard' do + discussion_topic_model(context: @course) + de = @topic.discussion_entries.create!(user: user_model) + de.update_columns(root_account_id: nil) + @topic.update_columns(root_account_id: nil) + + expect(DataFixup::PopulateRootAccountIdOnModels.check_if_association_has_root_account(DiscussionEntry, DiscussionEntry.reflections['discussion_topic'])).to be false + end + + it 'should actually find missing root account ids on other shards' do + @shard1.activate do + discussion_topic_model(context: @course) + end + de = @topic.discussion_entries.create!(user: user_model) + de.update_columns(root_account_id: nil) + @topic.update_columns(root_account_id: nil) + + expect(DataFixup::PopulateRootAccountIdOnModels.check_if_association_has_root_account(DiscussionEntry, DiscussionEntry.reflections['discussion_topic'])).to be false + end + end + + + end + describe '#populate_root_account_ids' do it 'should only update models with an id in the given range' do cm2 = @course.context_modules.create! @@ -1137,6 +1240,22 @@ describe DataFixup::PopulateRootAccountIdOnModels do expect(DataFixup::PopulateRootAccountIdOnModels).not_to receive(:run) DataFixup::PopulateRootAccountIdOnModels.populate_root_account_ids(ContextModule, {course: :root_account_id}, cm2.id, cm2.id) end + + context 'with_sharding' do + specs_require_sharding + + it 'should fill cross-shard data' do + user = @user + @shard1.activate do + course_model(account: account_model) + end + favorite = Favorite.create!(context: @course, user: user) + favorite.update_columns(root_account_id: nil) + + DataFixup::PopulateRootAccountIdOnModels.populate_root_account_ids(Favorite, {course: :root_account_id}, favorite.id, favorite.id) + expect(favorite.reload.root_account_id).to eq @course.root_account.global_id + end + end end describe '#populate_root_account_ids_override' do