optimize sis enrollment imports

* Attempt to cache the course/section between rows
 * On section change, calculate the new account associations *once*,
   and use the new User#update_account_associations incremental mode
   for strictly new enrollments
 * Keep the account_assocations account_chain cache intact between
   calls
 * Fall back to global update_account_associations if too few
   enrollments in the section, if not a strictly new enrollment, or
   if the user is already going to be globally updated

Change-Id: I884a394aef4f4b81f4472ee3a57f89c1f72ae371
Reviewed-on: https://gerrit.instructure.com/5136
Tested-by: Hudson <hudson@instructure.com>
Reviewed-by: JT Olds <jt@instructure.com>
This commit is contained in:
Cody Cutrer 2011-08-17 13:38:36 -06:00
parent f997efd263
commit 4bb161c9a4
3 changed files with 105 additions and 15 deletions

View File

@ -35,7 +35,6 @@ class Enrollment < ActiveRecord::Base
before_save :assign_uuid
before_save :assert_section
after_save :touch_user
before_save :update_user_account_associations_if_necessary
attr_accessible :user, :course, :workflow_state, :course_section, :limit_priveleges_to_course_section, :invitation_email

View File

@ -40,8 +40,12 @@ module SIS
def process(csv)
start = Time.now
update_account_association_user_ids = Set.new
incrementally_update_account_associations_user_ids = Set.new
users_to_touch_ids = Set.new
courses_to_touch_ids = Set.new
enrollments_to_update_sis_batch_ids = []
account_chain_cache = {}
course = section = nil
Enrollment.skip_callback(:belongs_to_touch_after_save_or_destroy_for_course) do
User.skip_updating_user_account_associations do
@ -63,13 +67,20 @@ module SIS
count += 1
remaining_in_transaction -= 1
course = nil
section = nil
last_section = section
# reset the cached course/section if they don't match this row
if course && row['course_id'].present? && course.sis_source_id != row['course_id']
course = nil
section = nil
end
if section && row['section_id'].present? && section.sis_source_id != row['section_id']
section = nil
end
pseudo = Pseudonym.find_by_account_id_and_sis_user_id(@root_account.id, row['user_id'])
user = pseudo.user rescue nil
course = Course.find_by_root_account_id_and_sis_source_id(@root_account.id, row['course_id']) unless row['course_id'].blank?
section = CourseSection.find_by_root_account_id_and_sis_source_id(@root_account.id, row['section_id']) unless row['section_id'].blank?
course ||= Course.find_by_root_account_id_and_sis_source_id(@root_account.id, row['course_id']) unless row['course_id'].blank?
section ||= CourseSection.find_by_root_account_id_and_sis_source_id(@root_account.id, row['section_id']) unless row['section_id'].blank?
unless user && (course || section)
add_warning csv, "Neither course #{row['course_id']} nor section #{row['section_id']} existed for user enrollment" unless (course || section)
add_warning csv, "User #{row['user_id']} didn't exist for user enrollment" unless user
@ -86,20 +97,34 @@ module SIS
next
end
unless section
section = course.course_sections.find_by_sis_source_id(row['section_id'])
section ||= course.default_section
end
course = section.course if !course || (course.id != section.course_id && section.nonxlist_course_id == course.id)
# reset cached/inferred course and section if they don't match with the opposite piece that was
# explicitly provided
section = course.default_section if section.nil? || row['section_id'].blank? && !section.default_section
course = section.course if course.nil? || (row['course_id'].blank? && course.id != section.course_id) ||
(course.id != section.course_id && section.nonxlist_course_id == course.id)
if course.id != section.course_id
add_warning csv, "An enrollment listed a section and a course that are unrelated"
next
end
# cache the course object to avoid later queries for it
# preload the course object to avoid later queries for it
section.course = course
# commit pending incremental account associations
if section != last_section and !incrementally_update_account_associations_user_ids.empty?
if incrementally_update_account_associations_user_ids.length < 10
update_account_association_user_ids.merge(incrementally_update_account_associations_user_ids)
else
User.update_account_associations(incrementally_update_account_associations_user_ids.to_a,
:incremental => true,
:precalculated_associations => User.calculate_account_associations_from_accounts(
[course.account_id, section.nonxlist_course.try(:account_id)].compact.uniq,
account_chain_cache
))
end
incrementally_update_account_associations_user_ids = Set.new
end
enrollment = section.enrollments.find_by_user_id(user.id)
unless enrollment
enrollment = Enrollment.new
@ -148,8 +173,15 @@ module SIS
end
courses_to_touch_ids.add(enrollment.course)
update_account_association_user_ids.add(user.id) if enrollment.should_update_user_account_association?
if enrollment.should_update_user_account_association?
if enrollment.new_record? && !update_account_association_user_ids.include?(user.id)
incrementally_update_account_associations_user_ids.add(user.id)
else
update_account_association_user_ids.add(user.id)
end
end
if enrollment.changed?
users_to_touch_ids.add(user.id)
enrollment.sis_batch_id = @batch.id if @batch
enrollment.save_without_broadcasting
elsif @batch
@ -163,15 +195,28 @@ module SIS
end
end
end
logger.debug("Raw enrollments took #{Time.now - start} seconds")
Enrollment.update_all({:sis_batch_id => @batch.id}, {:id => enrollments_to_update_sis_batch_ids}) if @batch && !enrollments_to_update_sis_batch_ids.empty?
# We batch these up at the end because we don't want to keep touching the same course over and over,
# and to avoid hitting other callbacks for the course (especially broadcast_policy)
Course.update_all({:updated_at => Time.now}, {:id => courses_to_touch_ids.to_a}) unless courses_to_touch_ids.empty?
# We batch these up at the end because normally a user would get several enrollments, and there's no reason
# to update their account associations on each one.
User.update_account_associations(update_account_association_user_ids.to_a)
if incrementally_update_account_associations_user_ids.length < 10
update_account_association_user_ids.merge(incrementally_update_account_associations_user_ids)
else
User.update_account_associations(incrementally_update_account_associations_user_ids.to_a,
:incremental,
:precalculated_associations => User.calculate_account_associations_from_accounts(
[course.account_id, section.nonxlist_course.try(:account_id)].compact.uniq,
account_chain_cache
))
end
User.update_account_associations(update_account_association_user_ids.to_a,
:account_chain_cache => account_chain_cache)
User.update_all({:updated_at => Time.now}, {:id => users_to_touch_ids.to_a}) unless users_to_touch_ids.empty?
logger.debug("Enrollments took #{Time.now - start} seconds")
logger.debug("Enrollments with batch operations took #{Time.now - start} seconds")
end
end
end

View File

@ -923,6 +923,52 @@ describe SIS::SisCsv do
importer.errors.length.should == 0
good_course.teachers.first.name.should == "User Uno"
end
it "should properly handle repeated courses and sections" do
#create course, users, and sections
process_csv_data_cleanly(
"course_id,short_name,long_name,account_id,term_id,status",
"test_1,TC 101,Test Course 101,,,active",
"test_2,TC 102,Test Course 102,,,active"
)
process_csv_data_cleanly(
"user_id,login_id,first_name,last_name,email,status",
"user_1,user1,User,Uno,user@example.com,active",
"user_2,user2,User,Dos,user2@example.com,active",
"user_3,user3,User,Tres,user3@example.com,active",
"user_4,user4,User,Cuatro,user4@example.com,active",
"user_5,user5,User,Cinco,user5@example.com,active",
"user_6,user6,User,Seis,user6@example.com,active"
)
process_csv_data_cleanly(
"section_id,course_id,name,status,start_date,end_date",
"S101,test_1,Sec1.1,active,,",
"S102,test_1,Sec1.2,active,,",
"S201,test_2,Sec2.1,active,,",
"S202,test_2,Sec2.2,active,,"
)
# the enrollments
process_csv_data_cleanly(
"course_id,user_id,role,section_id,status,associated_user_id",
"test_1,user_1,student,,active,",
"test_1,user_2,student,S101,active,",
"test_1,user_3,student,S102,active,",
"test_2,user_4,student,S201,active,",
",user_5,student,S201,active,",
",user_6,student,S202,active,"
)
course1 = @account.courses.find_by_sis_source_id("test_1")
course2 = @account.courses.find_by_sis_source_id("test_2")
course1.default_section.users.first.name.should == "User Uno"
section1_1 = course1.course_sections.find_by_sis_source_id("S101")
section1_1.users.first.name.should == "User Dos"
section1_2 = course1.course_sections.find_by_sis_source_id("S102")
section1_2.users.first.name.should == "User Tres"
section2_1 = course2.course_sections.find_by_sis_source_id("S201")
section2_1.users.map(&:name).sort.should == ["User Cuatro", "User Cinco"].sort
section2_2 = course2.course_sections.find_by_sis_source_id("S202")
section2_2.users.first.name.should == "User Seis"
end
end
context 'account importing' do