attachment gc improvements + support ContentMigration

fixes CORE-58

test plan:
- successfully deletes old ContentMigrations, including those tied to
old ContentExports

Change-Id: Ib9ca545da8a6929f495317bba08d0103acbaf1cc
Reviewed-on: https://gerrit.instructure.com/143330
Reviewed-by: Cody Cutrer <cody@instructure.com>
Tested-by: Jenkins
Product-Review: Simon Williams <simon@instructure.com>
QA-Review: Simon Williams <simon@instructure.com>
This commit is contained in:
Simon Williams 2018-02-27 13:41:04 -07:00
parent 848b1714ee
commit 07a3f31363
1 changed files with 55 additions and 7 deletions

View File

@ -30,8 +30,8 @@ class Attachments::GarbageCollector
to_delete_scope.where(root_attachment_id: nil).find_ids_in_batches(batch_size: 500) do |ids_batch|
non_type_children = Attachment.where(root_attachment_id: ids_batch).
not_deleted.
where.not(root_attachment_id: nil). # postgres is being weird
where.not(context_type: context_type).
where.not(root_attachment_id: nil). # postgres is being weird
order([:root_attachment_id, :id]).
select("distinct on (attachments.root_attachment_id) attachments.*").
group_by(&:root_attachment_id)
@ -53,22 +53,30 @@ class Attachments::GarbageCollector
end
if non_type_children[att.id].present?
if context_type == 'ContentExport' &&
non_type_children[att.id].detect{ |x| x.context_type == 'ContentMigration' }.present?
stats[:cm_skipped] += 1
next
end
stats[:reparent] += 1
# make_childless separates this object and copies the content to
# a new root attachment, so we still want to delete the content here.
att.make_childless(non_type_children[att.id].first) unless dry_run
att.destroy_content unless dry_run
destroy_att_with_retries(att)
elsif att.filename.present?
stats[:destroyed] += 1
att.destroy_content unless dry_run
destroy_att_with_retries(att)
end
to_delete_ids.concat([att.id, same_type_children_ids].flatten)
end
stats[:marked_deleted] += to_delete_ids.count
updates = { workflow_state: 'deleted', file_state: 'deleted', deleted_at: Time.now.utc }
Attachment.where(id: to_delete_ids).update_all(updates) unless dry_run
if to_delete_ids.present?
stats[:marked_deleted] += to_delete_ids.count
updates = { workflow_state: 'deleted', file_state: 'deleted', deleted_at: Time.now.utc }
Attachment.where(id: to_delete_ids).update_all(updates) unless dry_run
end
end
end
@ -124,6 +132,13 @@ class Attachments::GarbageCollector
return false unless children_max_created_at
children_max_created_at >= older_than
end
def destroy_att_with_retries(att, tries = 3)
att.destroy_content unless dry_run
rescue Aws::S3::Errors::InternalError
tries -= 1
tries.zero? ? raise : (sleep(10) && retry)
end
end
# context_type: 'Folder' is no longer generated by the code.
@ -145,7 +160,7 @@ class Attachments::GarbageCollector
# - context_type='User' (in the case of user data exports)
# which is why we use the join conditions below
class ContentExportContextType < ByContextType
def initialize(older_than:, dry_run: false)
def initialize(older_than: ContentExport.expire_days.days.ago, dry_run: false)
super(context_type: 'ContentExport', older_than: older_than, dry_run: dry_run)
end
@ -161,4 +176,37 @@ SQL
super
end
end
# We do lump exports and migrations together here because they are often
# intertwined.
#
# NOTE: content_migration.attachment are always either
# - context_type='ContentMigration', or
# - context_type='ContentExport'
class ContentExportAndMigrationContextType < ByContextType
def initialize(older_than: ContentMigration.expire_days.days.ago, dry_run: false)
super(context_type: ['ContentExport', 'ContentMigration'], older_than: older_than, dry_run: dry_run)
end
def delete_rows
raise "Cannot delete rows in dry_run mode" if dry_run
ce_null_scope = ContentExport.joins(<<-SQL).
INNER JOIN #{Attachment.quoted_table_name}
ON attachments.context_type = 'ContentExport'
AND content_exports.attachment_id = attachments.id
SQL
where(attachments: { workflow_state: 'deleted', file_state: 'deleted' })
while ce_null_scope.limit(1000).update_all(attachment_id: nil) > 0; end
cm_null_scope = ContentMigration.joins(<<-SQL).
INNER JOIN #{Attachment.quoted_table_name}
ON attachments.context_type IN ('ContentMigration', 'ContentExport')
AND content_migrations.attachment_id = attachments.id
SQL
where(attachments: { workflow_state: 'deleted', file_state: 'deleted' })
while cm_null_scope.limit(1000).update_all(attachment_id: nil) > 0; end
super
end
end
end