MSFT Sync steps misc details

* Set last_synced_at after completion * Rename cleanup_after_failure to after_failure for consistency with new after_complete hook * Don't enqueue if microsoft_sync_enabled is not set on the account closes INTEROP-6726 Test plan: - Set microsoft_sync_tenant on the course's root account but have microsoft_sync_enabled as false - Run sync (e.g. group.syncer_job.run_synchronously). Sync should quit immediately before doing anything. - Set microsoft_sync_enabled to true. Run sync. It should successfully sync the course enrollments. - Check that last_synced_at has been updated Change-Id: I4c01104f2c1e74791553278f4db4bb33682c6f4d Reviewed-on: https://gerrit.instructure.com/c/canvas-lms/+/262917 Reviewed-by: Wagner Goncalves <wagner.goncalves@instructure.com> QA-Review: Wagner Goncalves <wagner.goncalves@instructure.com> Product-Review: Evan Battaglia <ebattaglia@instructure.com> Tested-by: Service Cloud Jenkins <svc.cloudjenkins@instructure.com>
2021-04-14 12:48:04 -04:00 · 2021-04-14 12:48:04 -04:00 · 1edac3f32f
parent bd051859cd
commit 1edac3f32f
4 changed files with 71 additions and 16 deletions
--- a/lib/microsoft_sync/state_machine_job.rb
+++ b/lib/microsoft_sync/state_machine_job.rb
@ -38,7 +38,7 @@
 #   job_state_record.job_state is information about the step that failed,
 #       and data it may need to continue where it left off. There may be other
 #       temporary data in the DB with retry info (e.g. saved with a stash block
-#       and cleaned up in cleanup_after_failure())
+#       and cleaned up in after_failure())
 # After a job has hit a final error (error raised, or max `Retry`s surpassed):
 #   job_state_record.workflow_state is "errored"
 #   job_state_record.last_error will have a user-friendly/safe error message
@ -73,8 +73,12 @@ module MicrosoftSync
    #     take 0 or 1 arguments). They should return a NextStep or Retry object
    #     or COMPLETE (see below), or you can raise an error to signal a
    #     unretriable error.
-    #   cleanup_after_failure() -- called when a job fails (unretriable error
-    #   raised, or a Retry happens past max_retries).
+    #   after_failure() -- called when a job fails (unretriable error
+    #     raised, or a Retry happens past max_retries), or when a stale
+    #     job is restarted. Can be used to clean up state, e.g. state stored in
+    #     stash_block
+    #   after_complete() -- called when a step returns COMPLETE. Note: this is
+    #     run even if the state record has been deleted since we last checked.
    attr_reader :steps_object

    def initialize(job_state_record, steps_object)
@ -105,7 +109,7 @@ module MicrosoftSync
    # If the job has already surpassed the max number of retries, `error` will be
    # raised and handled like any non-retriable error (the job will fail, the
    # job_state_record's workflow_state will be set to error and error will be
-    # put in last_error, and we will call cleanup_after_failure()).
+    # put in last_error, and we will call after_failure()).
    #
    # Otherwise, a retry job will be enqueued; job_state_data will be written
    # into the job_state field of the job and passed into the same step when the
@ -166,7 +170,7 @@ module MicrosoftSync
      updated_at = job_state&.dig(:updated_at)
      if updated_at && updated_at < steps_object.restart_job_after_inactivity.ago
        # Trying to run a new job, old job has possibly stalled. Clean up and start over.
-        steps_object.cleanup_after_failure
+        steps_object.after_failure
        job_state_record.update!(job_state: nil)
        run_main_loop(nil, nil, synchronous)
      end
@ -201,6 +205,7 @@ module MicrosoftSync
          job_state_record&.update_workflow_state_unless_deleted(
            :completed, job_state: nil, last_error: nil
          )
+          steps_object.after_complete
          return
        when NextStep
          current_step, memory_state = result.next_step, result.memory_state
@ -239,7 +244,7 @@ module MicrosoftSync
      job_state_record&.update_workflow_state_unless_deleted(
        :errored, last_error: error_msg, job_state: nil
      )
-      steps_object.cleanup_after_failure
+      steps_object.after_failure
    end

    # Returns false if workflow_state has since been set to deleted (so we
--- a/lib/microsoft_sync/syncer_steps.rb
+++ b/lib/microsoft_sync/syncer_steps.rb
@ -54,11 +54,15 @@ module MicrosoftSync
      6.hours
    end

-    def cleanup_after_failure
+    def after_failure
      # We can clean up here e.g. (MicrosoftSync::GroupMember.delete_all)
      # when we have retry in getting owners & executing diff
    end

+    def after_complete
+      group.update!(last_synced_at: Time.zone.now)
+    end
+
    # This is semi-expected (user disables sync on account-level when jobs are
    # running), so we raise this which will cleanup_after_failure but not
    # produce a failed job.
@ -183,8 +187,15 @@ module MicrosoftSync
    end

    def tenant
-      @tenant ||= group.root_account.settings[:microsoft_sync_tenant] or
-        raise TenantMissingOrSyncDisabled
+      @tenant ||=
+        begin
+          settings = group.root_account.settings
+          enabled = settings[:microsoft_sync_enabled]
+          tenant = settings[:microsoft_sync_tenant]
+          raise TenantMissingOrSyncDisabled unless enabled && tenant
+
+          tenant
+        end
    end

    def graph_service_helpers
--- a/spec/lib/microsoft_sync/state_machine_job_spec.rb
+++ b/spec/lib/microsoft_sync/state_machine_job_spec.rb
@ -41,7 +41,13 @@ module MicrosoftSync
      6.hours
    end

-    def cleanup_after_failure; end
+    def after_failure
+      steps_run << [:after_failure]
+    end
+
+    def after_complete
+      steps_run << [:after_complete]
+    end
  end

  # Sample steps object to test functionality of StateMachineJob framework by
@ -122,6 +128,7 @@ module MicrosoftSync
          [:sleep, 2.seconds],
          [:step_first, nil, 'retry2'],
          [:step_second, 'first_data', nil],
+          [:after_complete],
        ])
      end
    end
@ -156,6 +163,7 @@ module MicrosoftSync
        expect(steps_object.steps_run).to eq([
          [:step_first, nil, 'retry2'],
          [:step_second, 'first_data', nil],
+          [:after_complete],
        ])
      end

@ -216,7 +224,7 @@ module MicrosoftSync
      end

      context 'when an unhandled error occurs' do
-        it 'bubbles up the error and sets the record state to errored' do
+        it 'bubbles up the error, sets the record state to errored, and calls after_failure' do
          subject.send(:run, nil)
          subject.send(:run, :step_first)

@ -227,6 +235,7 @@ module MicrosoftSync
          expect(state_record.reload.job_state).to eq(nil)
          expect(state_record.workflow_state).to eq('errored')
          expect(state_record.last_error).to eq(Errors.user_facing_message(error))
+          expect(steps_object.steps_run.last).to eq([:after_failure])
        end

        context 'when the error includes GracefulCancelErrorMixin' do
@ -234,11 +243,12 @@ module MicrosoftSync
            include MicrosoftSync::StateMachineJob::GracefulCancelErrorMixin
          end

-          it 'sets the record state and stops processing but does not bubble up the error' do
+          it 'sets the record state, calls after_failure, and stops processing but does not bubble up the error' do
            error = GracefulCancelTestError.new
            expect(steps_object).to receive(:step_first).and_raise(error)
            subject.send(:run, nil)
-            expect(steps_object.steps_run).to eq([]) # nothing enqueued
+            # nothing enqueued
+            expect(steps_object.steps_run).to eq([[:after_failure]])

            expect(state_record.reload.job_state).to eq(nil)
            expect(state_record.workflow_state).to eq('errored')
--- a/spec/lib/microsoft_sync/syncer_steps_spec.rb
+++ b/spec/lib/microsoft_sync/syncer_steps_spec.rb
@ -28,6 +28,7 @@ describe MicrosoftSync::SyncerSteps do
  let(:graph_service) { double('GraphService') }
  let(:graph_service_helpers) { double('GraphServiceHelpers', graph_service: graph_service) }
  let(:tenant) { 'mytenant123' }
+  let(:sync_enabled) { true }

  def expect_next_step(result, next_step, memory_state=nil)
    expect(result).to be_a(MicrosoftSync::StateMachineJob::NextStep)
@ -54,6 +55,7 @@ describe MicrosoftSync::SyncerSteps do

  before do
    ra = course.root_account
+    ra.settings[:microsoft_sync_enabled] = sync_enabled
    ra.settings[:microsoft_sync_tenant] = tenant
    ra.save!

@ -76,6 +78,23 @@ describe MicrosoftSync::SyncerSteps do
    it { is_expected.to eq(4) }
  end

+  describe '#after_failure' do
+    it 'is defined' do
+      # expand once we start using this (will be soon, when we stash
+      # in-progress paginated results when getting group members)
+      expect(syncer_steps.after_failure).to eq(nil)
+    end
+  end
+
+  describe '#after_complete' do
+    it 'sets last_synced_at on the group' do
+      Timecop.freeze do
+        expect { syncer_steps.after_complete }.to \
+          change { group.reload.last_synced_at }.from(nil).to(Time.zone.now)
+      end
+    end
+  end
+
  describe '#step_ensure_class_group_exists' do
    subject { syncer_steps.step_ensure_class_group_exists(nil, nil) }

@ -119,9 +138,7 @@ describe MicrosoftSync::SyncerSteps do
        end
      end

-      context 'when the tenant is not set in the account settings' do
-        let(:tenant) { nil }
-
+      shared_examples_for 'missing the correct account settings' do
        it 'raises a graceful cleanup error with a end-user-friendly name' do
          expect(MicrosoftSync::GraphServiceHelpers).to_not receive(:new)
          expect(syncer_steps).to_not receive(:ensure_class_group_exists)
@ -133,6 +150,18 @@ describe MicrosoftSync::SyncerSteps do
          expect(e).to be_a(MicrosoftSync::StateMachineJob::GracefulCancelErrorMixin)
        end
      end
+
+      context 'when the tenant is not set in the account settings' do
+        let(:tenant) { nil }
+
+        it_behaves_like 'missing the correct account settings'
+      end
+
+      context 'when the microsoft_sync_enabled is false in the account settings' do
+        let(:sync_enabled) { false }
+
+        it_behaves_like 'missing the correct account settings'
+      end
    end

    context "when we don't have a ms_group_id" do