SIS CSV diffing

This opt-in preprocessing pass does a diff on the current SIS Import against
the last successful import tagged with the same "data set identifier",
and generates a new zip of CSVs containing only the diff between the
two. It then continues with the import as normal, applying that diff'd
zip, greatly speeding up large imports where most data doesn't change.

closes CNVS-18432

test plan:

Exercise the new diffing options using the diffing_data_set_identifier
and diffing_remaster_data_set api options. See the new API
documentation for details and expected limitations. Edge cases include:
- zips that contain multiple CSVs for the same data type (won't diff)
- failed SIS imports (shouldn't diff against these)
- remasters should skip doing the diff, but should be diffed against on
  the next import

We don't expose any functionality for downloading SIS data that has been
uploaded, but you can grab it from the rails console:

> SisBatch.find(<batch id>).attachment.authenticated_s3_url
and
> SisBatch.find(<batch id>).generated_diff.authenticated_s3_url

Change-Id: I40d5e5ca8c376cecd685f4d06012f11bac021599
Reviewed-on: https://gerrit.instructure.com/48428
Tested-by: Jenkins
Reviewed-by: Jacob Fugal <jacob@instructure.com>
QA-Review: Jeremy Putnam <jeremyp@instructure.com>
Product-Review: Tyler Pickett <tpickett+gerrit@instructure.com>
Product-Review: Linda Feng <lfeng@instructure.com>
This commit is contained in:
Brian Palmer 2015-02-07 17:30:03 -07:00 committed by Tyler Pickett
parent 17fbe349ab
commit c212a473fd
19 changed files with 524 additions and 14 deletions

View File

@ -183,6 +183,16 @@
# "description": "Whether stickiness was cleared.",
# "example": "false",
# "type": "boolean"
# },
# "diffing_data_set_identifier": {
# "description": "The identifier of the data set that this SIS batch diffs against",
# "example": "account-5-enrollments",
# "type": "string"
# },
# "diffed_against_import_id": {
# "description": "The ID of the SIS Import that this import was diffed against",
# "example": 1,
# "type": "integer"
# }
# }
# }
@ -298,6 +308,17 @@ class SisImportsApiController < ApplicationController
# If 'add_sis_stickiness' is also provided, 'clear_sis_stickiness' will
# overrule the behavior of 'add_sis_stickiness'
#
# @argument diffing_data_set_identifier [String]
# If set on a CSV import, Canvas will attempt to optimize the SIS import by
# comparing this set of CSVs to the previous set that has the same data set
# identifier, and only appliying the difference between the two. See the
# SIS CSV Format documentation for more details.
#
# @argument diffing_remaster_data_set [Boolean]
# If true, and diffing_data_set_identifier is sent, this SIS import will be
# part of the data set, but diffing will not be performed. See the SIS CSV
# Format documentation for details.
#
# @returns SisImport
def create
if authorized_action(@account, @current_user, :manage_sis)
@ -356,6 +377,9 @@ class SisImportsApiController < ApplicationController
if batch_mode_term
batch.batch_mode = true
batch.batch_mode_term = batch_mode_term
elsif params[:diffing_data_set_identifier].present?
batch.enable_diffing(params[:diffing_data_set_identifier],
remaster: value_to_boolean(params[:diffing_remaster_data_set]))
end
batch.options ||= {}

View File

@ -24,7 +24,8 @@ class SisBatch < ActiveRecord::Base
serialize :processing_errors, Array
serialize :processing_warnings, Array
belongs_to :attachment
belongs_to :batch_mode_term, :class_name => 'EnrollmentTerm'
belongs_to :generated_diff, class_name: 'Attachment'
belongs_to :batch_mode_term, class_name: 'EnrollmentTerm'
belongs_to :user
EXPORTABLE_ATTRIBUTES = [
@ -37,6 +38,7 @@ class SisBatch < ActiveRecord::Base
before_save :limit_size_of_messages
validates_presence_of :account_id, :workflow_state
validates_length_of :diffing_data_set_identifier, maximum: 128
attr_accessor :zip_path
attr_accessible :batch_mode, :batch_mode_term
@ -67,13 +69,7 @@ class SisBatch < ActiveRecord::Base
batch.user = user
batch.save
Attachment.skip_3rd_party_submits(true)
att = Attachment.new
att.context = batch
att.uploaded_data = attachment
att.display_name = t :upload_filename, "sis_upload_%{id}.zip", :id => batch.id
att.save!
Attachment.skip_3rd_party_submits(false)
att = create_data_attachment(batch, attachment, t(:upload_filename, "sis_upload_%{id}.zip", :id => batch.id))
batch.attachment = att
yield batch if block_given?
@ -83,6 +79,18 @@ class SisBatch < ActiveRecord::Base
batch
end
def self.create_data_attachment(batch, data, display_name)
Attachment.new.tap do |att|
Attachment.skip_3rd_party_submits(true)
att.context = batch
att.uploaded_data = data
att.display_name = display_name
att.save!
end
ensure
Attachment.skip_3rd_party_submits(false)
end
workflow do
state :initializing
state :created
@ -97,6 +105,23 @@ class SisBatch < ActiveRecord::Base
self.class.queue_job_for_account(self.account)
end
def enable_diffing(data_set_id, opts = {})
if data[:import_type] == "instructure_csv"
self.diffing_data_set_identifier = data_set_id
if opts[:remaster]
self.diffing_remaster = true
end
end
end
def add_errors(messages)
self.processing_errors = (self.processing_errors || []) + messages
end
def add_warnings(messages)
self.processing_warnings = (self.processing_warnings || []) + messages
end
def self.queue_job_for_account(account)
process_delay = Setting.get('sis_batch_process_start_delay', '0').to_f
job_args = { :singleton => "sis_batch:account:#{Shard.birth.activate { account.id }}",
@ -148,6 +173,7 @@ class SisBatch < ActiveRecord::Base
scope :needs_processing, -> { where(:workflow_state => 'created').order(:created_at) }
scope :importing, -> { where(:workflow_state => 'importing') }
scope :succeeded, -> { where(:workflow_state => %w[imported imported_with_messages]) }
def self.process_all_for_account(account)
start_time = Time.now
@ -177,10 +203,36 @@ class SisBatch < ActiveRecord::Base
def process_instructure_csv_zip
require 'sis'
download_zip
generate_diff
importer = SIS::CSV::Import.process(self.account, :files => [ @data_file.path ], :batch => self, :override_sis_stickiness => options[:override_sis_stickiness], :add_sis_stickiness => options[:add_sis_stickiness], :clear_sis_stickiness => options[:clear_sis_stickiness])
finish importer.finished
end
def generate_diff
return if self.diffing_remaster # joined the chain, but don't actually want to diff this one
return unless self.diffing_data_set_identifier
previous_batch = self.account.sis_batches.
succeeded.where(diffing_data_set_identifier: self.diffing_data_set_identifier).order(:created_at).last
previous_zip = previous_batch.try(:download_zip)
return unless previous_zip
diffed_data_file = SIS::CSV::DiffGenerator.new(self.account, self).generate(previous_zip.path, @data_file.path)
return unless diffed_data_file
self.data[:diffed_against_sis_batch_id] = previous_batch.id
self.generated_diff = SisBatch.create_data_attachment(
self,
Rack::Test::UploadedFile.new(diffed_data_file.path, 'application/zip'),
t(:diff_filename, "sis_upload_diffed_%{id}.zip", :id => self.id)
)
self.save!
# Success, swap out the original update for this new diff and continue.
@data_file.try(:close)
@data_file = diffed_data_file
end
def download_zip
if self.data[:file_path]
@data_file = File.open(self.data[:file_path], 'rb')
@ -263,6 +315,8 @@ class SisBatch < ActiveRecord::Base
"override_sis_stickiness" => self.options[:override_sis_stickiness],
"add_sis_stickiness" => self.options[:add_sis_stickiness],
"clear_sis_stickiness" => self.options[:clear_sis_stickiness],
"diffing_data_set_identifier" => self.diffing_data_set_identifier,
"diffed_against_import_id" => self.options[:diffed_against_sis_batch_id],
}
data["processing_errors"] = self.processing_errors if self.processing_errors.present?
data["processing_warnings"] = self.processing_warnings if self.processing_warnings.present?

View File

@ -96,6 +96,12 @@
<div class="last_batch" style="margin-top: 20px;">
<h2><%= t(:last_batch_title, "Last Batch") %></h3>
<p><%= t(:started_at_message, "Started: %{started_at}", :started_at => datetime_string(@last_batch.created_at)) %><br/>
<% if @last_batch.diffing_data_set_identifier %>
<p><%= t("Data Set Identifier: %{data_set_id}", data_set_id: @last_batch.diffing_data_set_identifier) %></p>
<% if @last_batch.data[:diffed_against_sis_batch_id] %>
<p><%= t("Incremental update successfully generated against a previous SIS Import.") %></p>
<% end %>
<% end %>
<% if @last_batch.workflow_state == 'imported' %>
<%= t(:imported_message, "All SIS data was successfully imported.") %>
<%= print_counts @last_batch %>

View File

@ -0,0 +1,19 @@
class AddSisBatchDiffing < ActiveRecord::Migration
tag :predeploy
disable_ddl_transaction!
def up
add_column :sis_batches, :diffing_data_set_identifier, :string
add_column :sis_batches, :diffing_remaster, :boolean
add_column :sis_batches, :generated_diff_id, :integer, :limit => 8
add_index :sis_batches, [:account_id, :diffing_data_set_identifier, :created_at],
name: 'index_sis_batches_diffing',
algorithm: :concurrently
end
def down
remove_column :sis_batches, :generated_diff_id
remove_column :sis_batches, :diffing_remaster
remove_column :sis_batches, :diffing_data_set_identifier
end
end

View File

@ -36,6 +36,60 @@ sections and enrollments.
This option will only affect data created via previous SIS imports. Manually created courses, for
example, won't be deleted even if they don't appear in the new SIS import.
Diffing Mode
------------
If your account has a SIS integration that is sending its entire data set on
each import, rather than just sending what has changed, you can speed up
the import process by enabling diffing mode. In diffing mode, a
preprocessing step in Canvas will compare the current SIS import against
the last successful SIS import with the same *data set identifier*, and
only apply the difference between the two imports. For instance, if user
A is created by import 1, and the exact same information is specified
for user A in import 2, Canvas will mark that nothing has changed for
that CSV row and skip looking up user A entirely. This can greatly speed
up SIS imports with thousands of rows that change rarely.
It is important to note that if any SIS data was changed outside of that
previous CSV import, the changes will not be noticed by the diffing
code. For example:
1. Import 1 sets user A state to "active".
2. An admin sets user A state to "deleted" either through the Canvas
UI, or a non-diff SIS import.
3. Import 2 sets user A state to "active" again, and is configured to
diff against Import 1.
4. Because only the difference between Import 1 and Import 2 is
applied, and the user's state is "active" in both CSVs, the user
remains deleted.
Diffing mode is enabled by passing the `diffing_data_set_identifier`
option in the "Import SIS Data" API call. This is a unique, non-changing
string identifier for the series of SIS imports that will be diffed
against one another. The string can contain any valid UTF-8, and be up
to 128 bytes in length. If an account has multiple SIS integrations that
want to take advantage of diffing, each integration can select a unique
data set identifier to avoid interfering with each other.
When choosing a data set identifier, it's important to include any
relevant details to differentiate this data set from other import data
sets that may come concurrently or later. This might include things such
as source system, data type, and term id. Some examples of good identifiers:
* users:fall-2015
* source-system-1:all-data:spring-2016
If changes are made to SIS-managed objects outside of the normal import
process, as in the example given above, it may be necessary to process a SIS
import with the same data set identifier, but apply the entire import
rather than applying just the diff. To enable this mode, set the
`diffing_remaster_data_set=true` option when creating the import, and it
will be applied without diffing. The next import for the same data
set will still diff against that import.
CSV Data Formats
================
users.csv
---------

View File

@ -24,6 +24,10 @@ module SIS
row.include?('account_id') && row.include?('parent_account_id')
end
def self.identifying_fields
%w[account_id].freeze
end
# expected columns
# account_id,parent_account_id
def process(csv)

View File

@ -24,6 +24,10 @@ module SIS
row.include?('course_id') && row.include?('short_name')
end
def self.identifying_fields
%w[course_id].freeze
end
# expected columns
# course_id,short_name,long_name,account_id,term_id,status
def process(csv)

View File

@ -0,0 +1,102 @@
#
# Copyright (C) 2015 Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
require 'tempfile'
require 'zip'
module SIS
module CSV
class DiffGenerator
def initialize(root_account, batch)
@root_account = root_account
@batch = batch
end
def generate(previous_data_path, current_data_path)
previous_import = SIS::CSV::Import.new(@root_account, files: [previous_data_path])
previous_csvs = previous_import.prepare
current_import = SIS::CSV::Import.new(@root_account, files: [current_data_path])
current_csvs = current_import.prepare
@batch.add_warnings(current_import.warnings)
@batch.add_errors(current_import.errors)
output_csvs = generate_csvs(previous_csvs, current_csvs)
output_file = Tempfile.new(["sis_csv_diff_generator", ".zip"])
output_path = output_file.path
output_file.close!
Zip::File.open(output_path, Zip::File::CREATE) do |zip|
output_csvs.each do |csv|
zip.add(csv[:file], csv[:fullpath])
end
end
File.open(output_path, 'rb')
end
def generate_csvs(previous_csvs, current_csvs)
generated = []
current_csvs.each do |(import_type, csvs)|
current_csv = csvs.first
previous_csv = previous_csvs[import_type].try(:first)
if current_csv.nil? || previous_csv.nil?
generated.concat(csvs)
next
end
if csvs.size > 1 || previous_csvs[import_type].size > 1
add_warning(current_csv,
I18n.t("Can't perform diffing against more than one file of the same type"))
generated.concat(csvs)
next
end
begin
io = generate_diff(class_for_importer(import_type), previous_csv[:fullpath], current_csv[:fullpath])
generated << {
file: current_csv[:file],
fullpath: io.path,
tmpfile: io # returning the Tempfile alongside its path, to keep it in scope
}
rescue CsvDiff::Failure => e
add_warning(current_csv, I18n.t("Couldn't generate diff: %{message}", message: e.message))
generated.concat(csvs)
end
end
generated
end
protected
def class_for_importer(import_type)
SIS::CSV.const_get(import_type.to_s.camelcase + 'Importer')
end
def generate_diff(importer, previous_input, current_input)
previous_csv = ::CSV.open(previous_input, CSVBaseImporter::PARSE_ARGS)
current_csv = ::CSV.open(current_input, CSVBaseImporter::PARSE_ARGS)
diff = CsvDiff::Diff.new(importer.identifying_fields)
diff.generate(previous_csv, current_csv, deletes: ->(row) { row['status'] = 'deleted' })
end
def add_warning(csv, message)
@batch.add_warnings([[csv ? csv[:file] : "", message]])
end
end
end
end

View File

@ -24,6 +24,10 @@ module SIS
(row.include?('section_id') || row.include?('course_id')) && row.include?('user_id')
end
def self.identifying_fields
%w[course_id section_id user_id role associated_user_id].freeze
end
# expected columns
# course_id,user_id,role,section_id,status
def process(csv)

View File

@ -24,6 +24,10 @@ module SIS
row.include?('group_id') && row.include?('name')
end
def self.identifying_fields
%w[group_id].freeze
end
# expected columns
# group_id,account_id,name,status
def process(csv)

View File

@ -23,6 +23,10 @@ module SIS
row.include?('group_id') && row.include?('user_id')
end
def self.identifying_fields
%w[group_id user_id].freeze
end
# expected columns
# group_id,user_id,status
def process(csv)

View File

@ -88,7 +88,7 @@ module SIS
importer
end
def process
def prepare
@tmp_dirs = []
@files.each do |file|
if File.file?(file)
@ -122,6 +122,13 @@ module SIS
end
end
end
@csvs
end
def process
prepare
@parallelism = 1 if @total_rows <= @minimum_rows_for_parallel
# calculate how often we should update progress to get 1% resolution

View File

@ -25,6 +25,10 @@ module SIS
row.include?('section_id') && row.include?('name')
end
def self.identifying_fields
%w[section_id].freeze
end
# expected columns
# section_id,course_id,name,status,start_date,end_date
def process(csv)

View File

@ -24,6 +24,10 @@ module SIS
#This matcher works because a course has long_name/short_name
row.include?('term_id') && row.include?('name')
end
def self.identifying_fields
%w[term_id].freeze
end
# expected columns
# account_id,parent_account_id,name,status

View File

@ -24,6 +24,10 @@ module SIS
row.include?('user_id') && row.include?('login_id')
end
def self.identifying_fields
%w[user_id].freeze
end
# expected columns:
# user_id,login_id,first_name,last_name,email,status
def process(csv)

View File

@ -23,6 +23,10 @@ module SIS
def self.is_xlist_csv?(row)
row.include?('xlist_course_id') && row.include?('section_id')
end
def self.identifying_fields
%w[section_id].freeze
end
# possible columns:
# xlist_course_id, section_id, status

View File

@ -67,7 +67,10 @@ describe SisImportsApiController, type: :request do
"batch_mode" => opts[:batch_mode] ? true : nil,
"override_sis_stickiness" => opts[:override_sis_stickiness] ? true : nil,
"add_sis_stickiness" => opts[:add_sis_stickiness] ? true : nil,
"clear_sis_stickiness" => opts[:clear_sis_stickiness] ? true : nil})
"clear_sis_stickiness" => opts[:clear_sis_stickiness] ? true : nil,
"diffing_data_set_identifier" => nil,
"diffed_against_import_id" => nil,
})
batch.process_without_send_later
return batch
end
@ -101,7 +104,10 @@ describe SisImportsApiController, type: :request do
"batch_mode_term_id" => nil,
"override_sis_stickiness" => nil,
"add_sis_stickiness" => nil,
"clear_sis_stickiness" => nil })
"clear_sis_stickiness" => nil,
"diffing_data_set_identifier" => nil,
"diffed_against_import_id" => nil,
})
expect(SisBatch.count).to eq @batch_count + 1
expect(batch.batch_mode).to be_falsey
@ -142,7 +148,10 @@ describe SisImportsApiController, type: :request do
"batch_mode_term_id" => nil,
"override_sis_stickiness" => nil,
"add_sis_stickiness" => nil,
"clear_sis_stickiness" => nil })
"clear_sis_stickiness" => nil,
"diffing_data_set_identifier" => nil,
"diffed_against_import_id" => nil,
})
end
it "should skip the job for skip_sis_jobs_account_ids" do
@ -189,6 +198,20 @@ describe SisImportsApiController, type: :request do
expect(batch.batch_mode_term).to eq @account.default_enrollment_term
end
it "should enable diffing mode" do
json = api_call(:post,
"/api/v1/accounts/#{@account.id}/sis_imports.json",
{ controller: 'sis_imports_api', action: 'create',
format: 'json', account_id: @account.id.to_s },
{ import_type: 'instructure_csv',
attachment: fixture_file_upload("files/sis/test_user_1.csv", 'text/csv'),
diffing_data_set_identifier: 'my-users-data',
})
batch = SisBatch.find(json["id"])
expect(batch.batch_mode).to be_falsey
expect(batch.diffing_data_set_identifier).to eq 'my-users-data'
end
it "should error if batch mode and the term can't be found" do
expect {
json = api_call(:post,
@ -528,7 +551,10 @@ describe SisImportsApiController, type: :request do
"batch_mode_term_id" => nil,
"override_sis_stickiness" => nil,
"add_sis_stickiness" => nil,
"clear_sis_stickiness" => nil }]
"clear_sis_stickiness" => nil,
"diffing_data_set_identifier" => nil,
"diffed_against_import_id" => nil,
}]
})
end

View File

@ -0,0 +1,115 @@
#
# Copyright (C) 2015 Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper.rb')
describe SIS::CSV::DiffGenerator do
before :once do
account_model
@batch = @account.sis_batches.build
end
subject { described_class.new(@account, @batch) }
def csv(name, data)
@files ||= []
tf = Tempfile.new("spec")
@files << tf
tf.write(data)
tf.flush
{ file: "#{name}.csv", fullpath: tf.path }
end
describe '#generate_csvs' do
it 'should skip diffing if previous is empty' do
previous = {
}
current = {
course: [{ file: 'courses2.csv' }],
}
expect(subject.generate_csvs(previous, current)).to match_array([
{ file: 'courses2.csv' },
])
expect(@batch.processing_warnings).to be_empty
end
it 'should skip diffing if previous has more than one file of type' do
previous = {
user: [{ file: 'users1.csv' }, { file: 'users2.csv' }],
}
current = {
user: [{ file: 'users.csv' }],
}
expect(subject.generate_csvs(previous, current)).to match_array([
{ file: 'users.csv' },
])
warning = @batch.processing_warnings.first
expect(warning.first).to eq "users.csv"
expect(warning.last).to match(%r{diffing against more than one})
end
it 'should skip diffing if current has more than one file of type' do
previous = {
user: [{ file: 'users.csv' }],
}
current = {
user: [{ file: 'users1.csv' }, { file: 'users2.csv' }],
}
expect(subject.generate_csvs(previous, current)).to match_array([
{ file: 'users1.csv' }, { file: 'users2.csv' },
])
warning = @batch.processing_warnings.first
expect(warning.first).to eq "users1.csv"
expect(warning.last).to match(%r{diffing against more than one})
end
it 'should generate multiple diffs for different file types' do
previous = {
user: [{ file: 'users1.csv' }, { file: 'users2.csv' }],
course: [ csv("courses", "course_id,short_name,status\ncourse_1,test1,active\n") ],
account: [ csv("accounts", "account_id,status\naccount_1,active\n") ],
group: [ csv("groups", "group_id,status\ngroup_1,deleted\n") ],
}
current = {
user: [{ file: 'users.csv' }],
course: [ csv("courses", "course_id,short_name,status\ncourse_2,test2,active\n") ],
enrollment: [{ file: 'enrollments.csv' }],
account: [ csv("accounts", "account_id,status\naccount_1,active\n") ],
group: [ csv("groups", "group_id,status\ngroup_1,active\ngroup_2,active\n") ],
}
csvs = subject.generate_csvs(previous, current)
expect(csvs.size).to eq 5
expect(csvs.find { |f| f[:file] == 'users.csv' }).to eq({ file: 'users.csv' })
courses = csvs.find { |f| f[:file] == 'courses.csv' }
expect(File.read(courses[:fullpath])).to eq("course_id,short_name,status\ncourse_2,test2,active\ncourse_1,test1,deleted\n")
expect(csvs.find { |f| f[:file] == 'enrollments.csv' }).to eq({ file: 'enrollments.csv' })
accounts = csvs.find { |f| f[:file] == 'accounts.csv' }
expect(File.read(accounts[:fullpath])).to eq("account_id,status\n")
groups = csvs.find { |f| f[:file] == 'groups.csv' }
expect(File.read(groups[:fullpath])).to eq("group_id,status\ngroup_1,active\ngroup_2,active\n")
end
end
end

View File

@ -50,7 +50,7 @@ describe SisBatch do
def process_csv_data(data, opts = {})
create_csv_data(data) do |batch|
batch.update_attributes(opts) if opts.present?
batch.update_attributes(opts, without_protection: true) if opts.present?
batch.process_without_send_later
batch
end
@ -424,4 +424,71 @@ s2,test_1,section2,active},
expect(batch.processing_errors.size).to eq 3
expect(batch.processing_errors.last).to eq ['', 'There were 3 more errors']
end
context "csv diffing" do
it "should skip diffing if previous diff not available" do
SIS::CSV::DiffGenerator.any_instance.expects(:generate).never
batch = process_csv_data([
%{course_id,short_name,long_name,account_id,term_id,status
test_1,TC 101,Test Course 101,,term1,active
}], diffing_data_set_identifier: 'default')
# but still starts the chain
expect(batch.diffing_data_set_identifier).to eq 'default'
end
it "joins the chain but doesn't apply the diff when baseline is set" do
b1 = process_csv_data([
%{course_id,short_name,long_name,account_id,term_id,status
test_1,TC 101,Test Course 101,,term1,active
}], diffing_data_set_identifier: 'default')
batch = process_csv_data([
%{course_id,short_name,long_name,account_id,term_id,status
test_1,TC 101,Test Course 101,,term1,active
test_4,TC 104,Test Course 104,,term1,active
}], diffing_data_set_identifier: 'default', diffing_remaster: true)
expect(batch.diffing_data_set_identifier).to eq 'default'
expect(batch.data[:diffed_against_sis_batch_id]).to eq nil
expect(batch.generated_diff).to eq nil
end
it "should diff against the most previous successful batch in the same chain" do
b1 = process_csv_data([
%{course_id,short_name,long_name,account_id,term_id,status
test_1,TC 101,Test Course 101,,term1,active
}], diffing_data_set_identifier: 'default')
b2 = process_csv_data([
%{course_id,short_name,long_name,account_id,term_id,status
test_2,TC 102,Test Course 102,,term1,active
}], diffing_data_set_identifier: 'other')
# doesn't diff against failed imports on the chain
b3 = process_csv_data([
%{short_name,long_name,account_id,term_id,status
TC 103,Test Course 103,,term1,active
}], diffing_data_set_identifier: 'default')
expect(b3.workflow_state).to eq 'failed_with_messages'
batch = process_csv_data([
%{course_id,short_name,long_name,account_id,term_id,status
test_1,TC 101,Test Course 101,,term1,active
test_4,TC 104,Test Course 104,,term1,active
}], diffing_data_set_identifier: 'default')
expect(batch.data[:diffed_against_sis_batch_id]).to eq b1.id
# test_1 should not have been toched by this last batch, since it was diff'd out
expect(@account.courses.find_by_sis_source_id('test_1').sis_batch_id).to eq b1.id
expect(@account.courses.find_by_sis_source_id('test_4').sis_batch_id).to eq batch.id
# check the generated csv file, inside the new attached zip
zip = Zip::File.open(batch.generated_diff.open.path)
csvs = zip.glob('*.csv')
expect(csvs.size).to eq 1
expect(csvs.first.get_input_stream.read).to eq(
%{course_id,short_name,long_name,account_id,term_id,status
test_4,TC 104,Test Course 104,,term1,active
})
end
end
end