canvas-lms/lib/submission_lifecycle_manage...

530 lines
23 KiB
Ruby

# frozen_string_literal: true
#
# Copyright (C) 2013 - present Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
require "anonymity"
class SubmissionLifecycleManager
include Moderation
MAX_RUNNING_JOBS = 10
thread_mattr_accessor :executing_users, instance_accessor: false
# These methods allow the caller to specify a user to whom due date
# changes should be attributed (currently this is used for creating
# audit events for anonymous or moderated assignments), and are meant
# to be used when SubmissionLifecycleManager is invoked in a callback or a similar
# place where directly specifying an executing user is impractical.
#
# SubmissionLifecycleManager.with_executing_user(a_user) do
# # do something to update due dates, like saving an assignment override
# # any DDC calls that occur while an executing user is set will
# # attribute changes to that user
# end
#
# Users are stored on a stack, so nested calls will work as expected.
# A value of nil may also be passed to indicate that no user should be
# credited (in which case audit events will not be recorded).
#
# You may also specify a user explicitly when calling the class methods:
# SubmissionLifecycleManager.recompute(assignment, update_grades: true, executing_user: a_user)
#
# An explicitly specified user will take precedence over any users specified
# via with_executing_user, but will not otherwise affect the current "stack"
# of executing users.
#
# If you are calling SubmissionLifecycleManager in a delayed job of your own making (e.g.,
# Assignment#run_if_overrides_changed_later!), you should pass a user
# explicitly rather than relying on the user stored in with_executing_user
# at the time you create the delayed job.
def self.with_executing_user(user)
self.executing_users ||= []
self.executing_users.push(user)
begin
result = yield
ensure
self.executing_users.pop
end
result
end
def self.current_executing_user
self.executing_users ||= []
self.executing_users.last
end
def self.infer_submission_workflow_state_sql
<<~SQL_FRAGMENT
CASE
WHEN submission_type = 'online_quiz' AND quiz_submission_id IS NOT NULL AND (
SELECT EXISTS (
SELECT
*
FROM
#{Quizzes::QuizSubmission.quoted_table_name} qs
WHERE
quiz_submission_id = qs.id
AND workflow_state = 'pending_review'
)
) THEN
'pending_review'
WHEN grade IS NOT NULL OR excused IS TRUE THEN
'graded'
WHEN submission_type = 'online_quiz' AND quiz_submission_id IS NOT NULL THEN
'pending_review'
WHEN submission_type IS NOT NULL AND submitted_at IS NOT NULL THEN
'submitted'
ELSE
'unsubmitted'
END
SQL_FRAGMENT
end
def self.recompute(assignment, update_grades: false, executing_user: nil)
current_caller = caller(1..1).first
Rails.logger.debug "DDC.recompute(#{assignment&.id}) - #{current_caller}"
return unless assignment.persisted? && assignment.active?
# We use a strand here instead of a singleton because a bunch of
# assignment updates with upgrade_grades could end up causing
# score table fights.
opts = {
assignments: [assignment.id],
inst_jobs_opts: {
singleton: "cached_due_date:calculator:Assignment:#{assignment.global_id}:UpdateGrades:#{update_grades ? 1 : 0}",
max_attempts: 10
},
update_grades:,
original_caller: current_caller,
executing_user:
}
recompute_course(assignment.context, **opts)
end
def self.recompute_course(course, assignments: nil, inst_jobs_opts: {}, run_immediately: false, update_grades: false, original_caller: caller(1..1).first, executing_user: nil, skip_late_policy_applicator: false)
Rails.logger.debug "DDC.recompute_course(#{course.inspect}, #{assignments.inspect}, #{inst_jobs_opts.inspect}) - #{original_caller}"
course = Course.find(course) unless course.is_a?(Course)
inst_jobs_opts[:max_attempts] ||= 10
inst_jobs_opts[:singleton] ||= "cached_due_date:calculator:Course:#{course.global_id}:UpdateGrades:#{update_grades ? 1 : 0}" if assignments.nil?
inst_jobs_opts[:strand] ||= "cached_due_date:calculator:Course:#{course.global_id}"
assignments_to_recompute = assignments || AbstractAssignment.active.where(context: course).pluck(:id)
return if assignments_to_recompute.empty?
executing_user ||= current_executing_user
submission_lifecycle_manager = new(course, assignments_to_recompute, update_grades:, original_caller:, executing_user:, skip_late_policy_applicator:)
if run_immediately
submission_lifecycle_manager.recompute
else
submission_lifecycle_manager.delay_if_production(**inst_jobs_opts).recompute
end
end
def self.recompute_users_for_course(user_ids, course, assignments = nil, inst_jobs_opts = {})
opts = inst_jobs_opts.extract!(:update_grades, :executing_user, :sis_import, :require_singleton).reverse_merge(require_singleton: assignments.nil?)
user_ids = Array(user_ids)
course = Course.find(course) unless course.is_a?(Course)
update_grades = opts[:update_grades] || false
inst_jobs_opts[:max_attempts] ||= 10
inst_jobs_opts[:strand] ||= "cached_due_date:calculator:Course:#{course.global_id}"
if opts[:require_singleton]
inst_jobs_opts[:singleton] ||= "cached_due_date:calculator:Course:#{course.global_id}:Users:#{Digest::SHA256.hexdigest(user_ids.sort.join(":"))}:UpdateGrades:#{update_grades ? 1 : 0}"
end
assignments ||= AbstractAssignment.active.where(context: course).pluck(:id)
return if assignments.empty?
current_caller = caller(1..1).first
executing_user = opts[:executing_user] || current_executing_user
if opts[:sis_import]
running_jobs_count = Delayed::Job.running.where(shard_id: course.shard.id, tag: "SubmissionLifecycleManager#recompute_for_sis_import").count
if running_jobs_count >= MAX_RUNNING_JOBS
# there are too many sis recompute jobs running concurrently now. let's check again in a bit to see if we can run.
return delay_if_production(
**inst_jobs_opts,
run_at: 10.seconds.from_now
).recompute_users_for_course(user_ids, course, assignments, opts)
else
submission_lifecycle_manager = new(course, assignments, user_ids, update_grades:, original_caller: current_caller, executing_user:)
return submission_lifecycle_manager.delay_if_production(**inst_jobs_opts).recompute_for_sis_import
end
end
submission_lifecycle_manager = new(course, assignments, user_ids, update_grades:, original_caller: current_caller, executing_user:)
submission_lifecycle_manager.delay_if_production(**inst_jobs_opts).recompute
end
def initialize(course, assignments, user_ids = [], update_grades: false, original_caller: caller(1..1).first, executing_user: nil, skip_late_policy_applicator: false)
@course = course
@assignment_ids = Array(assignments).map { |a| a.is_a?(AbstractAssignment) ? a.id : a }
# ensure we're dealing with local IDs to avoid headaches downstream
if @assignment_ids.present?
@course.shard.activate do
if @assignment_ids.any? { |id| AbstractAssignment.global_id?(id) }
@assignment_ids = AbstractAssignment.where(id: @assignment_ids).pluck(:id)
end
@assignments_auditable_by_id = Set.new(AbstractAssignment.auditable.where(id: @assignment_ids).pluck(:id))
end
else
@assignments_auditable_by_id = Set.new
end
@user_ids = Array(user_ids)
@update_grades = update_grades
@original_caller = original_caller
@skip_late_policy_applicator = skip_late_policy_applicator
if executing_user.present?
@executing_user_id = executing_user.is_a?(User) ? executing_user.id : executing_user
end
end
# exists so that we can identify (and limit) jobs running specifically for sis imports
# Delayed::Job.where(tag: "SubmissionLifecycleManager#recompute_for_sis_import")
def recompute_for_sis_import
recompute
end
def recompute
Rails.logger.debug "SUBMISSION LIFECYCLE MANAGER STARTS: #{Time.zone.now.to_i}"
Rails.logger.debug "SLM#recompute() - original caller: #{@original_caller}"
Rails.logger.debug "SLM#recompute() - current caller: #{caller(1..1).first}"
# in a transaction on the correct shard:
@course.shard.activate do
values = []
assignments_by_id = AbstractAssignment.find(@assignment_ids).index_by(&:id)
effective_due_dates.to_hash.each do |assignment_id, student_due_dates|
existing_anonymous_ids = existing_anonymous_ids_by_assignment_id[assignment_id]
create_moderation_selections_for_assignment(assignments_by_id[assignment_id], student_due_dates.keys, @user_ids)
quiz_lti = quiz_lti_assignments.include?(assignment_id)
student_due_dates.each_key do |student_id|
submission_info = student_due_dates[student_id]
due_date = submission_info[:due_at] ? "'#{ActiveRecord::Base.connection.quoted_date(submission_info[:due_at].change(usec: 0))}'::timestamptz" : "NULL"
grading_period_id = submission_info[:grading_period_id] || "NULL"
anonymous_id = Anonymity.generate_id(existing_ids: existing_anonymous_ids)
existing_anonymous_ids << anonymous_id
sql_ready_anonymous_id = Submission.connection.quote(anonymous_id)
values << [assignment_id, student_id, due_date, grading_period_id, sql_ready_anonymous_id, quiz_lti, @course.root_account_id]
end
end
assignments_to_delete_all_submissions_for = []
# Delete submissions for students who don't have visibility to this assignment anymore
@assignment_ids.each do |assignment_id|
assigned_student_ids = effective_due_dates.find_effective_due_dates_for_assignment(assignment_id).keys
if @user_ids.blank? && assigned_student_ids.blank? && enrollment_counts.prior_student_ids.blank?
assignments_to_delete_all_submissions_for << assignment_id
else
# Delete the users we KNOW we need to delete in batches (it makes the database happier this way)
deletable_student_ids =
enrollment_counts.accepted_student_ids - assigned_student_ids - enrollment_counts.prior_student_ids
deletable_student_ids.each_slice(1000) do |deletable_student_ids_chunk|
# using this approach instead of using .in_batches because we want to limit the IDs in the IN clause to 1k
Submission.active.where(assignment_id:, user_id: deletable_student_ids_chunk)
.update_all(workflow_state: :deleted, updated_at: Time.zone.now)
end
User.clear_cache_keys(deletable_student_ids, :submissions)
end
end
assignments_to_delete_all_submissions_for.each_slice(50) do |assignment_slice|
subs = Submission.active.where(assignment_id: assignment_slice).limit(1_000)
while subs.update_all(workflow_state: :deleted, updated_at: Time.zone.now) > 0; end
end
nq_restore_pending_flag_enabled = Account.site_admin.feature_enabled?(:new_quiz_deleted_workflow_restore_pending_review_state)
# Get any stragglers that might have had their enrollment removed from the course
# 100 students at a time for 10 assignments each == slice of up to 1K submissions
enrollment_counts.deleted_student_ids.each_slice(100) do |student_slice|
@assignment_ids.each_slice(10) do |assignment_ids_slice|
Submission.active
.where(assignment_id: assignment_ids_slice, user_id: student_slice)
.update_all(workflow_state: :deleted, updated_at: Time.zone.now)
end
User.clear_cache_keys(student_slice, :submissions)
end
return if values.empty?
values = values.sort_by(&:first)
values.each_slice(1000) do |batch|
auditable_entries = []
cached_due_dates_by_submission = {}
if record_due_date_changed_events?
auditable_entries = batch.select { |entry| @assignments_auditable_by_id.include?(entry.first) }
cached_due_dates_by_submission = current_cached_due_dates(auditable_entries)
end
if nq_restore_pending_flag_enabled
handle_lti_deleted_submissions(batch)
end
# prepare values for SQL interpolation
batch_values = batch.map { |entry| "(#{entry.join(",")})" }
perform_submission_upsert(batch_values)
next unless record_due_date_changed_events? && auditable_entries.present?
record_due_date_changes_for_auditable_assignments!(
entries: auditable_entries,
previous_cached_dates: cached_due_dates_by_submission
)
end
User.clear_cache_keys(values.pluck(1), :submissions)
end
if @update_grades
@course.recompute_student_scores_without_send_later(@user_ids)
end
if @assignment_ids.size == 1 && !@skip_late_policy_applicator
# Only changes to LatePolicy or (sometimes) AbstractAssignment records can result in a re-calculation
# of student scores. No changes to the Course record can trigger such re-calculations so
# let's ensure this is triggered only when SubmissionLifecycleManager is called for a Assignment-level
# changes and not for Course-level changes
assignment = @course.shard.activate { AbstractAssignment.find(@assignment_ids.first) }
LatePolicyApplicator.for_assignment(assignment)
end
end
private
EnrollmentCounts = Struct.new(:accepted_student_ids, :prior_student_ids, :deleted_student_ids)
def enrollment_counts
@enrollment_counts ||= begin
counts = EnrollmentCounts.new([], [], [])
GuardRail.activate(:secondary) do
# The various workflow states below try to mimic similarly named scopes off of course
scope = Enrollment.select(
:user_id,
"count(nullif(workflow_state not in ('rejected', 'deleted'), false)) as accepted_count",
"count(nullif(workflow_state in ('completed'), false)) as prior_count",
"count(nullif(workflow_state in ('rejected', 'deleted'), false)) as deleted_count"
)
.where(course_id: @course, type: ["StudentEnrollment", "StudentViewEnrollment"])
.group(:user_id)
scope = scope.where(user_id: @user_ids) if @user_ids.present?
scope.find_each do |record|
if record.accepted_count > 0
if record.accepted_count == record.prior_count
counts.prior_student_ids << record.user_id
else
counts.accepted_student_ids << record.user_id
end
else
counts.deleted_student_ids << record.user_id
end
end
end
counts
end
end
def effective_due_dates
@effective_due_dates ||= begin
edd = EffectiveDueDates.for_course(@course, @assignment_ids)
edd.filter_students_to(@user_ids) if @user_ids.present?
edd
end
end
def current_cached_due_dates(entries)
return {} if entries.empty?
entries_for_query = assignment_and_student_id_values(entries:)
submissions_with_due_dates = Submission.where("(assignment_id, user_id) IN (#{entries_for_query.join(",")})")
.where.not(cached_due_date: nil)
.pluck(:id, :cached_due_date)
submissions_with_due_dates.each_with_object({}) do |(submission_id, cached_due_date), map|
map[submission_id] = cached_due_date
end
end
def record_due_date_changes_for_auditable_assignments!(entries:, previous_cached_dates:)
entries_for_query = assignment_and_student_id_values(entries:)
updated_submissions = Submission.where("(assignment_id, user_id) IN (#{entries_for_query.join(",")})")
.pluck(:id, :assignment_id, :cached_due_date)
timestamp = Time.zone.now
records_to_insert = updated_submissions.each_with_object([]) do |(submission_id, assignment_id, new_due_date), records|
old_due_date = previous_cached_dates.fetch(submission_id, nil)
next if new_due_date == old_due_date
payload = { due_at: [old_due_date&.iso8601, new_due_date&.iso8601] }
records << {
assignment_id:,
submission_id:,
user_id: @executing_user_id,
event_type: "submission_updated",
payload: payload.to_json,
created_at: timestamp,
updated_at: timestamp
}
end
AnonymousOrModerationEvent.bulk_insert(records_to_insert)
end
def assignment_and_student_id_values(entries:)
entries.map { |(assignment_id, student_id)| "(#{assignment_id}, #{student_id})" }
end
def record_due_date_changed_events?
# Only audit if we have a user and at least one auditable assignment
@record_due_date_changed_events ||= @executing_user_id.present? && @assignments_auditable_by_id.present?
end
def quiz_lti_assignments
# We only care about quiz LTIs, so we'll only snag those. In fact,
# we only care if the assignment *is* a quiz, LTI, so we'll just
# keep a set of those assignment ids.
@quiz_lti_assignments ||=
ContentTag.joins("INNER JOIN #{ContextExternalTool.quoted_table_name} ON content_tags.content_type='ContextExternalTool' AND context_external_tools.id = content_tags.content_id")
.merge(ContextExternalTool.quiz_lti)
.where(context_type: "Assignment"). #
# We're doing the following direct postgres any() rather than .where(context_id: @assignment_ids) on advice
# from our DBAs that the any is considerably faster in the postgres planner than the "IN ()" statement that
# AR would have generated.
where("content_tags.context_id = any('{?}'::int8[])", @assignment_ids)
.where.not(workflow_state: "deleted").distinct.pluck(:context_id).to_set
end
def existing_anonymous_ids_by_assignment_id
@existing_anonymous_ids_by_assignment_id ||=
Submission
.anonymized
.for_assignment(effective_due_dates.to_hash.keys)
.pluck(:assignment_id, :anonymous_id)
.each_with_object(Hash.new { |h, k| h[k] = [] }) { |data, h| h[data.first] << data.last }
end
def perform_submission_upsert(batch_values)
# Construct upsert statement to update existing Submissions or create them if needed.
query = <<~SQL.squish
UPDATE #{Submission.quoted_table_name}
SET
cached_due_date = vals.due_date::timestamptz,
grading_period_id = vals.grading_period_id::integer,
workflow_state = COALESCE(NULLIF(workflow_state, 'deleted'), (
#{self.class.infer_submission_workflow_state_sql}
)),
anonymous_id = COALESCE(submissions.anonymous_id, vals.anonymous_id),
cached_quiz_lti = vals.cached_quiz_lti,
updated_at = now() AT TIME ZONE 'UTC'
FROM (VALUES #{batch_values.join(",")})
AS vals(assignment_id, student_id, due_date, grading_period_id, anonymous_id, cached_quiz_lti, root_account_id)
WHERE submissions.user_id = vals.student_id AND
submissions.assignment_id = vals.assignment_id AND
(
(submissions.cached_due_date IS DISTINCT FROM vals.due_date::timestamptz) OR
(submissions.grading_period_id IS DISTINCT FROM vals.grading_period_id::integer) OR
(submissions.workflow_state <> COALESCE(NULLIF(submissions.workflow_state, 'deleted'),
(#{self.class.infer_submission_workflow_state_sql})
)) OR
(submissions.anonymous_id IS DISTINCT FROM COALESCE(submissions.anonymous_id, vals.anonymous_id)) OR
(submissions.cached_quiz_lti IS DISTINCT FROM vals.cached_quiz_lti)
);
INSERT INTO #{Submission.quoted_table_name}
(assignment_id, user_id, workflow_state, created_at, updated_at, course_id,
cached_due_date, grading_period_id, anonymous_id, cached_quiz_lti, root_account_id)
SELECT
assignments.id, vals.student_id, 'unsubmitted',
now() AT TIME ZONE 'UTC', now() AT TIME ZONE 'UTC',
assignments.context_id, vals.due_date::timestamptz, vals.grading_period_id::integer,
vals.anonymous_id,
vals.cached_quiz_lti,
vals.root_account_id
FROM (VALUES #{batch_values.join(",")})
AS vals(assignment_id, student_id, due_date, grading_period_id, anonymous_id, cached_quiz_lti, root_account_id)
INNER JOIN #{AbstractAssignment.quoted_table_name} assignments
ON assignments.id = vals.assignment_id
LEFT OUTER JOIN #{Submission.quoted_table_name} submissions
ON submissions.assignment_id = assignments.id
AND submissions.user_id = vals.student_id
WHERE submissions.id IS NULL;
SQL
begin
Submission.transaction do
Submission.connection.execute(query)
end
rescue ActiveRecord::RecordNotUnique => e
Canvas::Errors.capture_exception(:submission_lifecycle_manager, e, :warn)
raise Delayed::RetriableError, "Unique record violation when creating new submissions"
rescue ActiveRecord::Deadlocked => e
Canvas::Errors.capture_exception(:submission_lifecycle_manager, e, :warn)
raise Delayed::RetriableError, "Deadlock when upserting submissions"
end
end
def handle_lti_deleted_submissions(batch)
quiz_lti_index = 5
assignments_and_users_query = batch.each_with_object([]) do |entry, memo|
next unless entry[quiz_lti_index]
memo << "(#{entry.first}, #{entry.second})"
end
return if assignments_and_users_query.empty?
submission_join_query = <<~SQL.squish
INNER JOIN (VALUES #{assignments_and_users_query.join(",")})
AS vals(assignment_id, student_id)
ON submissions.assignment_id = vals.assignment_id
AND submissions.user_id = vals.student_id
SQL
submission_query = Submission.deleted.joins(submission_join_query)
submission_versions_to_check = Version
.where(versionable: submission_query)
.order(number: :desc)
.distinct(:versionable_id)
submissions_in_pending_review = submission_versions_to_check
.select { |version| version.model.workflow_state == "pending_review" }
.pluck(:versionable_id)
if submissions_in_pending_review.any?
Submission.where(id: submissions_in_pending_review).update_all(workflow_state: "pending_review")
end
end
end