don't keep bouncing the same channels repeatedly

also store a fast flag to prevent
many jobs for trying to hit the same address
right on the heels of each other.

Change-Id: I8dfbcbf7a5b0a3ff3445840bc031d1e4ea3cf79f
Reviewed-on: https://gerrit.instructure.com/c/canvas-lms/+/260575
Reviewed-by: Rob Orton <rob@instructure.com>
Tested-by: Service Cloud Jenkins <svc.cloudjenkins@instructure.com>
QA-Review: Ethan Vizitei <evizitei@instructure.com>
Product-Review: Ethan Vizitei <evizitei@instructure.com>
This commit is contained in:
Ethan Vizitei 2021-03-12 13:03:34 -06:00
parent e1a2f30ece
commit ef86b153f3
3 changed files with 72 additions and 12 deletions

View File

@ -93,13 +93,32 @@ class BounceNotificationProcessor
InstStatsd::Statsd.increment("bounce_notification_processor.processed.#{type}")
bouncy_addresses(bounce_notification).each do |address|
CommunicationChannel.bounce_for_path(
path: address,
timestamp: bounce_timestamp(bounce_notification),
details: bounce_notification,
permanent_bounce: is_permanent_bounce?(bounce_notification),
suppression_bounce: is_suppression_bounce?(bounce_notification)
)
cache_key = "bounce_notification_#{address}"
# repeatedly bouncing the same address at the same time is
# not helpful (not much additional value info)
# and has negative consequences for the database
# (queries can pile up). This redis check is intended to be an inexpensive
# "best effort" at not-completely-necessary syncrhonization
# while expiring quickly enough to prevent bunches of new users
# with the same email address from getting overlooked by bounce methods.
# Redis keys get evicted at random, so it might not stick around,
# and it expires quickly, so if the process takes a few minutes
# another process might start bouncing this address again, but at least
# they shouldn't be trying to start within a minute of each other
# most of the time.
if Rails.cache.read(cache_key) == "running"
Rails.logger.info("[BOUNCE_NOTIFICATION] Not processing bounce for #{address}, another job is already processing right now")
else
Rails.cache.write(cache_key, "running", expires_in: 3.minutes)
CommunicationChannel.bounce_for_path(
path: address,
timestamp: bounce_timestamp(bounce_notification),
details: bounce_notification,
permanent_bounce: is_permanent_bounce?(bounce_notification),
suppression_bounce: is_suppression_bounce?(bounce_notification)
)
Rails.cache.delete(cache_key)
end
end
end

View File

@ -540,7 +540,16 @@ class CommunicationChannel < ActiveRecord::Base
return if !permanent_bounce && CommunicationChannel.associated_shards(path).count > Setting.get("comm_channel_shard_count_too_high", '50').to_i
Shard.with_each_shard(CommunicationChannel.associated_shards(path)) do
CommunicationChannel.unretired.email.by_path(path).where("bounce_count<?", RETIRE_THRESHOLD).find_in_batches do |batch|
cc_scope = CommunicationChannel.unretired.email.by_path(path).where("bounce_count<?", RETIRE_THRESHOLD)
# If alllowed to do this naively, trying to capture bounces on the same
# email address over and over can lead to serious db churn. Here we
# try to capture only the newly created communication channels for this path,
# or the ones that have NOT been bounced in the last hour, to make sure
# we aren't doing un-helpful overwork.
debounce_window = Setting.get("comm_channel_bounce_debounce_window_in_min", "60").to_i
bounce_field = suppression_bounce ? "last_suppression_bounce_at" : (permanent_bounce ? "last_bounce_at" : "last_transient_bounce_at")
bouncable_scope = cc_scope.where("#{bounce_field} IS NULL OR updated_at < ?", debounce_window.minutes.ago)
bouncable_scope.find_in_batches do |batch|
update = if suppression_bounce
{ last_suppression_bounce_at: timestamp, updated_at: Time.zone.now }
elsif permanent_bounce

View File

@ -21,6 +21,10 @@
require File.expand_path(File.dirname(__FILE__) + '/../sharding_spec_helper')
describe CommunicationChannel do
before(:once) do
Messages::Partitioner.process
end
before(:each) do
@pseudonym = double('Pseudonym')
allow(@pseudonym).to receive(:destroyed?).and_return(false)
@ -151,7 +155,8 @@ describe CommunicationChannel do
path: 'foo@bar.edu',
last_bounce_at: '2015-01-01T01:01:01.000Z',
last_suppression_bounce_at: '2015-03-03T03:03:03.000Z',
last_transient_bounce_at: '2015-04-04T04:04:04.000Z'
last_transient_bounce_at: '2015-04-04T04:04:04.000Z',
updated_at: '2015-04-04T04:04:04.000Z'
)
CommunicationChannel.bounce_for_path(
path: 'foo@bar.edu',
@ -465,7 +470,8 @@ describe CommunicationChannel do
path: 'foo@bar.edu',
last_bounce_at: '2015-01-01T01:01:01.000Z',
last_suppression_bounce_at: '2015-03-03T03:03:03.000Z',
last_transient_bounce_at: '2015-04-04T04:04:04.000Z'
last_transient_bounce_at: '2015-04-04T04:04:04.000Z',
updated_at: '2015-04-04T04:04:04.000Z'
)
CommunicationChannel.bounce_for_path(
path: 'foo@bar.edu',
@ -486,7 +492,8 @@ describe CommunicationChannel do
path: 'foo@bar.edu',
last_bounce_at: '2015-01-01T01:01:01.000Z',
last_suppression_bounce_at: '2015-03-03T03:03:03.000Z',
last_transient_bounce_at: '2015-04-04T04:04:04.000Z'
last_transient_bounce_at: '2015-04-04T04:04:04.000Z',
updated_at: '2015-04-04T04:04:04.000Z'
)
CommunicationChannel.bounce_for_path(
path: 'foo@bar.edu',
@ -507,7 +514,8 @@ describe CommunicationChannel do
path: 'foo@bar.edu',
last_bounce_at: '2015-01-01T01:01:01.000Z',
last_suppression_bounce_at: '2015-03-03T03:03:03.000Z',
last_transient_bounce_at: '2015-04-04T04:04:04.000Z'
last_transient_bounce_at: '2015-04-04T04:04:04.000Z',
updated_at: '2015-04-04T04:04:04.000Z'
)
CommunicationChannel.bounce_for_path(
path: 'foo@bar.edu',
@ -538,6 +546,30 @@ describe CommunicationChannel do
expect(cc.last_transient_bounce_details).to be_nil
end
it "won't bounce twice in a row" do
cc = communication_channel_model(path: 'foo@bar.edu')
bounce_action = lambda do
CommunicationChannel.bounce_for_path(
path: cc.path,
timestamp: Time.zone.now,
details: { 'some' => 'details'},
permanent_bounce: false,
suppression_bounce: false
)
end
expect { bounce_action.call() }.to change {
cc.reload.last_transient_bounce_at
}
expect { bounce_action.call() }.to not_change {
cc.reload.last_transient_bounce_at
}
Timecop.travel(3.hours) do
expect { bounce_action.call() }.to change {
cc.reload.last_transient_bounce_at
}
end
end
it 'accounts for current callbacks in bulk bouncer' do
# If you hit this spec failure, you changed the callbacks that have been
# checked in the communication_channel save. Make sure that it is not an