230 lines
11 KiB
Ruby
230 lines
11 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
#
|
|
# Copyright (C) 2021 - present Instructure, Inc.
|
|
#
|
|
# This file is part of Canvas.
|
|
#
|
|
# Canvas is free software: you can redistribute it and/or modify it under
|
|
# the terms of the GNU Affero General Public License as published by the Free
|
|
# Software Foundation, version 3 of the License.
|
|
#
|
|
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
|
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
|
|
# details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License along
|
|
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
#
|
|
|
|
# rubocop:disable Rails/SquishedSQLHeredocs
|
|
class FixSingletonRaceConditionDelete < ActiveRecord::Migration[5.2]
|
|
tag :predeploy
|
|
|
|
def up
|
|
execute(<<~SQL)
|
|
CREATE OR REPLACE FUNCTION #{connection.quote_table_name("delayed_jobs_after_delete_row_tr_fn")} () RETURNS trigger AS $$
|
|
DECLARE
|
|
next_strand varchar;
|
|
running_count integer;
|
|
should_lock boolean;
|
|
should_be_precise boolean;
|
|
update_query varchar;
|
|
skip_locked varchar;
|
|
transition boolean;
|
|
BEGIN
|
|
IF OLD.strand IS NOT NULL THEN
|
|
should_lock := true;
|
|
should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
|
|
|
|
IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
|
|
running_count := (SELECT COUNT(*) FROM (
|
|
SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
|
) subquery_for_count);
|
|
should_lock := running_count < OLD.max_concurrent;
|
|
END IF;
|
|
|
|
IF should_lock THEN
|
|
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
|
|
END IF;
|
|
|
|
-- note that we don't really care if the row we're deleting has a singleton, or if it even
|
|
-- matches the row(s) we're going to update. we just need to make sure that whatever
|
|
-- singleton we grab isn't already running (which is a simple existence check, since
|
|
-- the unique indexes ensure there is at most one singleton running, and one queued)
|
|
update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
|
SELECT id FROM delayed_jobs j2
|
|
WHERE next_in_strand=false AND
|
|
j2.strand=$1.strand AND
|
|
(j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
|
|
ORDER BY j2.strand_order_override ASC, j2.id ASC
|
|
LIMIT ';
|
|
|
|
IF should_be_precise THEN
|
|
running_count := (SELECT COUNT(*) FROM (
|
|
SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
|
) s);
|
|
IF running_count < OLD.max_concurrent THEN
|
|
update_query := update_query || '($1.max_concurrent - $2)';
|
|
ELSE
|
|
-- we have too many running already; just bail
|
|
RETURN OLD;
|
|
END IF;
|
|
ELSE
|
|
update_query := update_query || '1';
|
|
|
|
-- n-strands don't require precise ordering; we can make this query more performant
|
|
IF OLD.max_concurrent > 1 THEN
|
|
skip_locked := ' SKIP LOCKED';
|
|
END IF;
|
|
END IF;
|
|
|
|
update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
|
|
EXECUTE update_query USING OLD, running_count;
|
|
END IF;
|
|
|
|
IF OLD.singleton IS NOT NULL THEN
|
|
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
|
|
|
|
transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
|
|
|
|
IF transition THEN
|
|
next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
|
|
|
|
IF next_strand IS NOT NULL THEN
|
|
-- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
|
|
IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
|
|
-- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
|
|
RETURN OLD;
|
|
END IF;
|
|
END IF;
|
|
ELSIF OLD.strand IS NOT NULL THEN
|
|
-- if there is no transition and there is a strand then we have already handled this singleton in the case above --
|
|
RETURN OLD;
|
|
END IF;
|
|
|
|
-- handles transitioning a singleton from stranded to not stranded --
|
|
-- handles transitioning a singleton from unstranded to stranded --
|
|
-- handles transitioning a singleton from strand A to strand B --
|
|
-- these transitions are a relatively rare case, so we take a shortcut and --
|
|
-- only start the next singleton if its strand does not currently have any running jobs --
|
|
-- if it does, the next stranded job that finishes will start this singleton if it can --
|
|
UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
|
SELECT id FROM delayed_jobs j2
|
|
WHERE next_in_strand=false AND
|
|
j2.singleton=OLD.singleton AND
|
|
j2.locked_by IS NULL AND
|
|
(j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
|
|
FOR UPDATE
|
|
);
|
|
END IF;
|
|
RETURN OLD;
|
|
END;
|
|
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
|
SQL
|
|
end
|
|
|
|
def down
|
|
execute(<<~SQL)
|
|
CREATE OR REPLACE FUNCTION #{connection.quote_table_name("delayed_jobs_after_delete_row_tr_fn")} () RETURNS trigger AS $$
|
|
DECLARE
|
|
next_strand varchar;
|
|
running_count integer;
|
|
should_lock boolean;
|
|
should_be_precise boolean;
|
|
update_query varchar;
|
|
skip_locked varchar;
|
|
transition boolean;
|
|
BEGIN
|
|
IF OLD.strand IS NOT NULL THEN
|
|
should_lock := true;
|
|
should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
|
|
|
|
IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
|
|
running_count := (SELECT COUNT(*) FROM (
|
|
SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
|
) subquery_for_count);
|
|
should_lock := running_count < OLD.max_concurrent;
|
|
END IF;
|
|
|
|
IF should_lock THEN
|
|
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
|
|
END IF;
|
|
|
|
-- note that we don't really care if the row we're deleting has a singleton, or if it even
|
|
-- matches the row(s) we're going to update. we just need to make sure that whatever
|
|
-- singleton we grab isn't already running (which is a simple existence check, since
|
|
-- the unique indexes ensure there is at most one singleton running, and one queued)
|
|
update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
|
SELECT id FROM delayed_jobs j2
|
|
WHERE next_in_strand=false AND
|
|
j2.strand=$1.strand AND
|
|
(j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
|
|
ORDER BY j2.strand_order_override ASC, j2.id ASC
|
|
LIMIT ';
|
|
|
|
IF should_be_precise THEN
|
|
running_count := (SELECT COUNT(*) FROM (
|
|
SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
|
) s);
|
|
IF running_count < OLD.max_concurrent THEN
|
|
update_query := update_query || '($1.max_concurrent - $2)';
|
|
ELSE
|
|
-- we have too many running already; just bail
|
|
RETURN OLD;
|
|
END IF;
|
|
ELSE
|
|
update_query := update_query || '1';
|
|
|
|
-- n-strands don't require precise ordering; we can make this query more performant
|
|
IF OLD.max_concurrent > 1 THEN
|
|
skip_locked := ' SKIP LOCKED';
|
|
END IF;
|
|
END IF;
|
|
|
|
update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
|
|
EXECUTE update_query USING OLD, running_count;
|
|
END IF;
|
|
|
|
IF OLD.singleton IS NOT NULL THEN
|
|
transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
|
|
|
|
IF transition THEN
|
|
next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
|
|
|
|
IF next_strand IS NOT NULL THEN
|
|
-- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
|
|
IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
|
|
-- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
|
|
RETURN OLD;
|
|
END IF;
|
|
END IF;
|
|
ELSIF OLD.strand IS NOT NULL THEN
|
|
-- if there is no transition and there is a strand then we have already handled this singleton in the case above --
|
|
RETURN OLD;
|
|
END IF;
|
|
|
|
-- handles transitioning a singleton from stranded to not stranded --
|
|
-- handles transitioning a singleton from unstranded to stranded --
|
|
-- handles transitioning a singleton from strand A to strand B --
|
|
-- these transitions are a relatively rare case, so we take a shortcut and --
|
|
-- only start the next singleton if its strand does not currently have any running jobs --
|
|
-- if it does, the next stranded job that finishes will start this singleton if it can --
|
|
UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
|
SELECT id FROM delayed_jobs j2
|
|
WHERE next_in_strand=false AND
|
|
j2.singleton=OLD.singleton AND
|
|
j2.locked_by IS NULL AND
|
|
(j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
|
|
FOR UPDATE
|
|
);
|
|
END IF;
|
|
RETURN OLD;
|
|
END;
|
|
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
|
SQL
|
|
end
|
|
end
|
|
# rubocop:enable Rails/SquishedSQLHeredocs
|