SIS CSV diffing
This opt-in preprocessing pass does a diff on the current SIS Import against the last successful import tagged with the same "data set identifier", and generates a new zip of CSVs containing only the diff between the two. It then continues with the import as normal, applying that diff'd zip, greatly speeding up large imports where most data doesn't change. closes CNVS-18432 test plan: Exercise the new diffing options using the diffing_data_set_identifier and diffing_remaster_data_set api options. See the new API documentation for details and expected limitations. Edge cases include: - zips that contain multiple CSVs for the same data type (won't diff) - failed SIS imports (shouldn't diff against these) - remasters should skip doing the diff, but should be diffed against on the next import We don't expose any functionality for downloading SIS data that has been uploaded, but you can grab it from the rails console: > SisBatch.find(<batch id>).attachment.authenticated_s3_url and > SisBatch.find(<batch id>).generated_diff.authenticated_s3_url Change-Id: I40d5e5ca8c376cecd685f4d06012f11bac021599 Reviewed-on: https://gerrit.instructure.com/48428 Tested-by: Jenkins Reviewed-by: Jacob Fugal <jacob@instructure.com> QA-Review: Jeremy Putnam <jeremyp@instructure.com> Product-Review: Tyler Pickett <tpickett+gerrit@instructure.com> Product-Review: Linda Feng <lfeng@instructure.com>
This commit is contained in:
parent
17fbe349ab
commit
c212a473fd
|
@ -183,6 +183,16 @@
|
||||||
# "description": "Whether stickiness was cleared.",
|
# "description": "Whether stickiness was cleared.",
|
||||||
# "example": "false",
|
# "example": "false",
|
||||||
# "type": "boolean"
|
# "type": "boolean"
|
||||||
|
# },
|
||||||
|
# "diffing_data_set_identifier": {
|
||||||
|
# "description": "The identifier of the data set that this SIS batch diffs against",
|
||||||
|
# "example": "account-5-enrollments",
|
||||||
|
# "type": "string"
|
||||||
|
# },
|
||||||
|
# "diffed_against_import_id": {
|
||||||
|
# "description": "The ID of the SIS Import that this import was diffed against",
|
||||||
|
# "example": 1,
|
||||||
|
# "type": "integer"
|
||||||
# }
|
# }
|
||||||
# }
|
# }
|
||||||
# }
|
# }
|
||||||
|
@ -298,6 +308,17 @@ class SisImportsApiController < ApplicationController
|
||||||
# If 'add_sis_stickiness' is also provided, 'clear_sis_stickiness' will
|
# If 'add_sis_stickiness' is also provided, 'clear_sis_stickiness' will
|
||||||
# overrule the behavior of 'add_sis_stickiness'
|
# overrule the behavior of 'add_sis_stickiness'
|
||||||
#
|
#
|
||||||
|
# @argument diffing_data_set_identifier [String]
|
||||||
|
# If set on a CSV import, Canvas will attempt to optimize the SIS import by
|
||||||
|
# comparing this set of CSVs to the previous set that has the same data set
|
||||||
|
# identifier, and only appliying the difference between the two. See the
|
||||||
|
# SIS CSV Format documentation for more details.
|
||||||
|
#
|
||||||
|
# @argument diffing_remaster_data_set [Boolean]
|
||||||
|
# If true, and diffing_data_set_identifier is sent, this SIS import will be
|
||||||
|
# part of the data set, but diffing will not be performed. See the SIS CSV
|
||||||
|
# Format documentation for details.
|
||||||
|
#
|
||||||
# @returns SisImport
|
# @returns SisImport
|
||||||
def create
|
def create
|
||||||
if authorized_action(@account, @current_user, :manage_sis)
|
if authorized_action(@account, @current_user, :manage_sis)
|
||||||
|
@ -356,6 +377,9 @@ class SisImportsApiController < ApplicationController
|
||||||
if batch_mode_term
|
if batch_mode_term
|
||||||
batch.batch_mode = true
|
batch.batch_mode = true
|
||||||
batch.batch_mode_term = batch_mode_term
|
batch.batch_mode_term = batch_mode_term
|
||||||
|
elsif params[:diffing_data_set_identifier].present?
|
||||||
|
batch.enable_diffing(params[:diffing_data_set_identifier],
|
||||||
|
remaster: value_to_boolean(params[:diffing_remaster_data_set]))
|
||||||
end
|
end
|
||||||
|
|
||||||
batch.options ||= {}
|
batch.options ||= {}
|
||||||
|
|
|
@ -24,7 +24,8 @@ class SisBatch < ActiveRecord::Base
|
||||||
serialize :processing_errors, Array
|
serialize :processing_errors, Array
|
||||||
serialize :processing_warnings, Array
|
serialize :processing_warnings, Array
|
||||||
belongs_to :attachment
|
belongs_to :attachment
|
||||||
belongs_to :batch_mode_term, :class_name => 'EnrollmentTerm'
|
belongs_to :generated_diff, class_name: 'Attachment'
|
||||||
|
belongs_to :batch_mode_term, class_name: 'EnrollmentTerm'
|
||||||
belongs_to :user
|
belongs_to :user
|
||||||
|
|
||||||
EXPORTABLE_ATTRIBUTES = [
|
EXPORTABLE_ATTRIBUTES = [
|
||||||
|
@ -37,6 +38,7 @@ class SisBatch < ActiveRecord::Base
|
||||||
before_save :limit_size_of_messages
|
before_save :limit_size_of_messages
|
||||||
|
|
||||||
validates_presence_of :account_id, :workflow_state
|
validates_presence_of :account_id, :workflow_state
|
||||||
|
validates_length_of :diffing_data_set_identifier, maximum: 128
|
||||||
|
|
||||||
attr_accessor :zip_path
|
attr_accessor :zip_path
|
||||||
attr_accessible :batch_mode, :batch_mode_term
|
attr_accessible :batch_mode, :batch_mode_term
|
||||||
|
@ -67,13 +69,7 @@ class SisBatch < ActiveRecord::Base
|
||||||
batch.user = user
|
batch.user = user
|
||||||
batch.save
|
batch.save
|
||||||
|
|
||||||
Attachment.skip_3rd_party_submits(true)
|
att = create_data_attachment(batch, attachment, t(:upload_filename, "sis_upload_%{id}.zip", :id => batch.id))
|
||||||
att = Attachment.new
|
|
||||||
att.context = batch
|
|
||||||
att.uploaded_data = attachment
|
|
||||||
att.display_name = t :upload_filename, "sis_upload_%{id}.zip", :id => batch.id
|
|
||||||
att.save!
|
|
||||||
Attachment.skip_3rd_party_submits(false)
|
|
||||||
batch.attachment = att
|
batch.attachment = att
|
||||||
|
|
||||||
yield batch if block_given?
|
yield batch if block_given?
|
||||||
|
@ -83,6 +79,18 @@ class SisBatch < ActiveRecord::Base
|
||||||
batch
|
batch
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.create_data_attachment(batch, data, display_name)
|
||||||
|
Attachment.new.tap do |att|
|
||||||
|
Attachment.skip_3rd_party_submits(true)
|
||||||
|
att.context = batch
|
||||||
|
att.uploaded_data = data
|
||||||
|
att.display_name = display_name
|
||||||
|
att.save!
|
||||||
|
end
|
||||||
|
ensure
|
||||||
|
Attachment.skip_3rd_party_submits(false)
|
||||||
|
end
|
||||||
|
|
||||||
workflow do
|
workflow do
|
||||||
state :initializing
|
state :initializing
|
||||||
state :created
|
state :created
|
||||||
|
@ -97,6 +105,23 @@ class SisBatch < ActiveRecord::Base
|
||||||
self.class.queue_job_for_account(self.account)
|
self.class.queue_job_for_account(self.account)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def enable_diffing(data_set_id, opts = {})
|
||||||
|
if data[:import_type] == "instructure_csv"
|
||||||
|
self.diffing_data_set_identifier = data_set_id
|
||||||
|
if opts[:remaster]
|
||||||
|
self.diffing_remaster = true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def add_errors(messages)
|
||||||
|
self.processing_errors = (self.processing_errors || []) + messages
|
||||||
|
end
|
||||||
|
|
||||||
|
def add_warnings(messages)
|
||||||
|
self.processing_warnings = (self.processing_warnings || []) + messages
|
||||||
|
end
|
||||||
|
|
||||||
def self.queue_job_for_account(account)
|
def self.queue_job_for_account(account)
|
||||||
process_delay = Setting.get('sis_batch_process_start_delay', '0').to_f
|
process_delay = Setting.get('sis_batch_process_start_delay', '0').to_f
|
||||||
job_args = { :singleton => "sis_batch:account:#{Shard.birth.activate { account.id }}",
|
job_args = { :singleton => "sis_batch:account:#{Shard.birth.activate { account.id }}",
|
||||||
|
@ -148,6 +173,7 @@ class SisBatch < ActiveRecord::Base
|
||||||
|
|
||||||
scope :needs_processing, -> { where(:workflow_state => 'created').order(:created_at) }
|
scope :needs_processing, -> { where(:workflow_state => 'created').order(:created_at) }
|
||||||
scope :importing, -> { where(:workflow_state => 'importing') }
|
scope :importing, -> { where(:workflow_state => 'importing') }
|
||||||
|
scope :succeeded, -> { where(:workflow_state => %w[imported imported_with_messages]) }
|
||||||
|
|
||||||
def self.process_all_for_account(account)
|
def self.process_all_for_account(account)
|
||||||
start_time = Time.now
|
start_time = Time.now
|
||||||
|
@ -177,10 +203,36 @@ class SisBatch < ActiveRecord::Base
|
||||||
def process_instructure_csv_zip
|
def process_instructure_csv_zip
|
||||||
require 'sis'
|
require 'sis'
|
||||||
download_zip
|
download_zip
|
||||||
|
generate_diff
|
||||||
|
|
||||||
importer = SIS::CSV::Import.process(self.account, :files => [ @data_file.path ], :batch => self, :override_sis_stickiness => options[:override_sis_stickiness], :add_sis_stickiness => options[:add_sis_stickiness], :clear_sis_stickiness => options[:clear_sis_stickiness])
|
importer = SIS::CSV::Import.process(self.account, :files => [ @data_file.path ], :batch => self, :override_sis_stickiness => options[:override_sis_stickiness], :add_sis_stickiness => options[:add_sis_stickiness], :clear_sis_stickiness => options[:clear_sis_stickiness])
|
||||||
finish importer.finished
|
finish importer.finished
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def generate_diff
|
||||||
|
return if self.diffing_remaster # joined the chain, but don't actually want to diff this one
|
||||||
|
return unless self.diffing_data_set_identifier
|
||||||
|
previous_batch = self.account.sis_batches.
|
||||||
|
succeeded.where(diffing_data_set_identifier: self.diffing_data_set_identifier).order(:created_at).last
|
||||||
|
previous_zip = previous_batch.try(:download_zip)
|
||||||
|
return unless previous_zip
|
||||||
|
|
||||||
|
diffed_data_file = SIS::CSV::DiffGenerator.new(self.account, self).generate(previous_zip.path, @data_file.path)
|
||||||
|
return unless diffed_data_file
|
||||||
|
|
||||||
|
self.data[:diffed_against_sis_batch_id] = previous_batch.id
|
||||||
|
|
||||||
|
self.generated_diff = SisBatch.create_data_attachment(
|
||||||
|
self,
|
||||||
|
Rack::Test::UploadedFile.new(diffed_data_file.path, 'application/zip'),
|
||||||
|
t(:diff_filename, "sis_upload_diffed_%{id}.zip", :id => self.id)
|
||||||
|
)
|
||||||
|
self.save!
|
||||||
|
# Success, swap out the original update for this new diff and continue.
|
||||||
|
@data_file.try(:close)
|
||||||
|
@data_file = diffed_data_file
|
||||||
|
end
|
||||||
|
|
||||||
def download_zip
|
def download_zip
|
||||||
if self.data[:file_path]
|
if self.data[:file_path]
|
||||||
@data_file = File.open(self.data[:file_path], 'rb')
|
@data_file = File.open(self.data[:file_path], 'rb')
|
||||||
|
@ -263,6 +315,8 @@ class SisBatch < ActiveRecord::Base
|
||||||
"override_sis_stickiness" => self.options[:override_sis_stickiness],
|
"override_sis_stickiness" => self.options[:override_sis_stickiness],
|
||||||
"add_sis_stickiness" => self.options[:add_sis_stickiness],
|
"add_sis_stickiness" => self.options[:add_sis_stickiness],
|
||||||
"clear_sis_stickiness" => self.options[:clear_sis_stickiness],
|
"clear_sis_stickiness" => self.options[:clear_sis_stickiness],
|
||||||
|
"diffing_data_set_identifier" => self.diffing_data_set_identifier,
|
||||||
|
"diffed_against_import_id" => self.options[:diffed_against_sis_batch_id],
|
||||||
}
|
}
|
||||||
data["processing_errors"] = self.processing_errors if self.processing_errors.present?
|
data["processing_errors"] = self.processing_errors if self.processing_errors.present?
|
||||||
data["processing_warnings"] = self.processing_warnings if self.processing_warnings.present?
|
data["processing_warnings"] = self.processing_warnings if self.processing_warnings.present?
|
||||||
|
|
|
@ -96,6 +96,12 @@
|
||||||
<div class="last_batch" style="margin-top: 20px;">
|
<div class="last_batch" style="margin-top: 20px;">
|
||||||
<h2><%= t(:last_batch_title, "Last Batch") %></h3>
|
<h2><%= t(:last_batch_title, "Last Batch") %></h3>
|
||||||
<p><%= t(:started_at_message, "Started: %{started_at}", :started_at => datetime_string(@last_batch.created_at)) %><br/>
|
<p><%= t(:started_at_message, "Started: %{started_at}", :started_at => datetime_string(@last_batch.created_at)) %><br/>
|
||||||
|
<% if @last_batch.diffing_data_set_identifier %>
|
||||||
|
<p><%= t("Data Set Identifier: %{data_set_id}", data_set_id: @last_batch.diffing_data_set_identifier) %></p>
|
||||||
|
<% if @last_batch.data[:diffed_against_sis_batch_id] %>
|
||||||
|
<p><%= t("Incremental update successfully generated against a previous SIS Import.") %></p>
|
||||||
|
<% end %>
|
||||||
|
<% end %>
|
||||||
<% if @last_batch.workflow_state == 'imported' %>
|
<% if @last_batch.workflow_state == 'imported' %>
|
||||||
<%= t(:imported_message, "All SIS data was successfully imported.") %>
|
<%= t(:imported_message, "All SIS data was successfully imported.") %>
|
||||||
<%= print_counts @last_batch %>
|
<%= print_counts @last_batch %>
|
||||||
|
|
|
@ -0,0 +1,19 @@
|
||||||
|
class AddSisBatchDiffing < ActiveRecord::Migration
|
||||||
|
tag :predeploy
|
||||||
|
disable_ddl_transaction!
|
||||||
|
|
||||||
|
def up
|
||||||
|
add_column :sis_batches, :diffing_data_set_identifier, :string
|
||||||
|
add_column :sis_batches, :diffing_remaster, :boolean
|
||||||
|
add_column :sis_batches, :generated_diff_id, :integer, :limit => 8
|
||||||
|
add_index :sis_batches, [:account_id, :diffing_data_set_identifier, :created_at],
|
||||||
|
name: 'index_sis_batches_diffing',
|
||||||
|
algorithm: :concurrently
|
||||||
|
end
|
||||||
|
|
||||||
|
def down
|
||||||
|
remove_column :sis_batches, :generated_diff_id
|
||||||
|
remove_column :sis_batches, :diffing_remaster
|
||||||
|
remove_column :sis_batches, :diffing_data_set_identifier
|
||||||
|
end
|
||||||
|
end
|
|
@ -36,6 +36,60 @@ sections and enrollments.
|
||||||
This option will only affect data created via previous SIS imports. Manually created courses, for
|
This option will only affect data created via previous SIS imports. Manually created courses, for
|
||||||
example, won't be deleted even if they don't appear in the new SIS import.
|
example, won't be deleted even if they don't appear in the new SIS import.
|
||||||
|
|
||||||
|
Diffing Mode
|
||||||
|
------------
|
||||||
|
|
||||||
|
If your account has a SIS integration that is sending its entire data set on
|
||||||
|
each import, rather than just sending what has changed, you can speed up
|
||||||
|
the import process by enabling diffing mode. In diffing mode, a
|
||||||
|
preprocessing step in Canvas will compare the current SIS import against
|
||||||
|
the last successful SIS import with the same *data set identifier*, and
|
||||||
|
only apply the difference between the two imports. For instance, if user
|
||||||
|
A is created by import 1, and the exact same information is specified
|
||||||
|
for user A in import 2, Canvas will mark that nothing has changed for
|
||||||
|
that CSV row and skip looking up user A entirely. This can greatly speed
|
||||||
|
up SIS imports with thousands of rows that change rarely.
|
||||||
|
|
||||||
|
It is important to note that if any SIS data was changed outside of that
|
||||||
|
previous CSV import, the changes will not be noticed by the diffing
|
||||||
|
code. For example:
|
||||||
|
|
||||||
|
1. Import 1 sets user A state to "active".
|
||||||
|
2. An admin sets user A state to "deleted" either through the Canvas
|
||||||
|
UI, or a non-diff SIS import.
|
||||||
|
3. Import 2 sets user A state to "active" again, and is configured to
|
||||||
|
diff against Import 1.
|
||||||
|
4. Because only the difference between Import 1 and Import 2 is
|
||||||
|
applied, and the user's state is "active" in both CSVs, the user
|
||||||
|
remains deleted.
|
||||||
|
|
||||||
|
Diffing mode is enabled by passing the `diffing_data_set_identifier`
|
||||||
|
option in the "Import SIS Data" API call. This is a unique, non-changing
|
||||||
|
string identifier for the series of SIS imports that will be diffed
|
||||||
|
against one another. The string can contain any valid UTF-8, and be up
|
||||||
|
to 128 bytes in length. If an account has multiple SIS integrations that
|
||||||
|
want to take advantage of diffing, each integration can select a unique
|
||||||
|
data set identifier to avoid interfering with each other.
|
||||||
|
|
||||||
|
When choosing a data set identifier, it's important to include any
|
||||||
|
relevant details to differentiate this data set from other import data
|
||||||
|
sets that may come concurrently or later. This might include things such
|
||||||
|
as source system, data type, and term id. Some examples of good identifiers:
|
||||||
|
|
||||||
|
* users:fall-2015
|
||||||
|
* source-system-1:all-data:spring-2016
|
||||||
|
|
||||||
|
If changes are made to SIS-managed objects outside of the normal import
|
||||||
|
process, as in the example given above, it may be necessary to process a SIS
|
||||||
|
import with the same data set identifier, but apply the entire import
|
||||||
|
rather than applying just the diff. To enable this mode, set the
|
||||||
|
`diffing_remaster_data_set=true` option when creating the import, and it
|
||||||
|
will be applied without diffing. The next import for the same data
|
||||||
|
set will still diff against that import.
|
||||||
|
|
||||||
|
CSV Data Formats
|
||||||
|
================
|
||||||
|
|
||||||
users.csv
|
users.csv
|
||||||
---------
|
---------
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,10 @@ module SIS
|
||||||
row.include?('account_id') && row.include?('parent_account_id')
|
row.include?('account_id') && row.include?('parent_account_id')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.identifying_fields
|
||||||
|
%w[account_id].freeze
|
||||||
|
end
|
||||||
|
|
||||||
# expected columns
|
# expected columns
|
||||||
# account_id,parent_account_id
|
# account_id,parent_account_id
|
||||||
def process(csv)
|
def process(csv)
|
||||||
|
|
|
@ -24,6 +24,10 @@ module SIS
|
||||||
row.include?('course_id') && row.include?('short_name')
|
row.include?('course_id') && row.include?('short_name')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.identifying_fields
|
||||||
|
%w[course_id].freeze
|
||||||
|
end
|
||||||
|
|
||||||
# expected columns
|
# expected columns
|
||||||
# course_id,short_name,long_name,account_id,term_id,status
|
# course_id,short_name,long_name,account_id,term_id,status
|
||||||
def process(csv)
|
def process(csv)
|
||||||
|
|
|
@ -0,0 +1,102 @@
|
||||||
|
#
|
||||||
|
# Copyright (C) 2015 Instructure, Inc.
|
||||||
|
#
|
||||||
|
# This file is part of Canvas.
|
||||||
|
#
|
||||||
|
# Canvas is free software: you can redistribute it and/or modify it under
|
||||||
|
# the terms of the GNU Affero General Public License as published by the Free
|
||||||
|
# Software Foundation, version 3 of the License.
|
||||||
|
#
|
||||||
|
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
|
||||||
|
# details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License along
|
||||||
|
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
require 'tempfile'
|
||||||
|
require 'zip'
|
||||||
|
|
||||||
|
module SIS
|
||||||
|
module CSV
|
||||||
|
class DiffGenerator
|
||||||
|
def initialize(root_account, batch)
|
||||||
|
@root_account = root_account
|
||||||
|
@batch = batch
|
||||||
|
end
|
||||||
|
|
||||||
|
def generate(previous_data_path, current_data_path)
|
||||||
|
previous_import = SIS::CSV::Import.new(@root_account, files: [previous_data_path])
|
||||||
|
previous_csvs = previous_import.prepare
|
||||||
|
current_import = SIS::CSV::Import.new(@root_account, files: [current_data_path])
|
||||||
|
current_csvs = current_import.prepare
|
||||||
|
@batch.add_warnings(current_import.warnings)
|
||||||
|
@batch.add_errors(current_import.errors)
|
||||||
|
|
||||||
|
output_csvs = generate_csvs(previous_csvs, current_csvs)
|
||||||
|
output_file = Tempfile.new(["sis_csv_diff_generator", ".zip"])
|
||||||
|
output_path = output_file.path
|
||||||
|
output_file.close!
|
||||||
|
Zip::File.open(output_path, Zip::File::CREATE) do |zip|
|
||||||
|
output_csvs.each do |csv|
|
||||||
|
zip.add(csv[:file], csv[:fullpath])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
File.open(output_path, 'rb')
|
||||||
|
end
|
||||||
|
|
||||||
|
def generate_csvs(previous_csvs, current_csvs)
|
||||||
|
generated = []
|
||||||
|
current_csvs.each do |(import_type, csvs)|
|
||||||
|
current_csv = csvs.first
|
||||||
|
previous_csv = previous_csvs[import_type].try(:first)
|
||||||
|
|
||||||
|
if current_csv.nil? || previous_csv.nil?
|
||||||
|
generated.concat(csvs)
|
||||||
|
next
|
||||||
|
end
|
||||||
|
|
||||||
|
if csvs.size > 1 || previous_csvs[import_type].size > 1
|
||||||
|
add_warning(current_csv,
|
||||||
|
I18n.t("Can't perform diffing against more than one file of the same type"))
|
||||||
|
generated.concat(csvs)
|
||||||
|
next
|
||||||
|
end
|
||||||
|
|
||||||
|
begin
|
||||||
|
io = generate_diff(class_for_importer(import_type), previous_csv[:fullpath], current_csv[:fullpath])
|
||||||
|
generated << {
|
||||||
|
file: current_csv[:file],
|
||||||
|
fullpath: io.path,
|
||||||
|
tmpfile: io # returning the Tempfile alongside its path, to keep it in scope
|
||||||
|
}
|
||||||
|
rescue CsvDiff::Failure => e
|
||||||
|
add_warning(current_csv, I18n.t("Couldn't generate diff: %{message}", message: e.message))
|
||||||
|
generated.concat(csvs)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
generated
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
|
||||||
|
def class_for_importer(import_type)
|
||||||
|
SIS::CSV.const_get(import_type.to_s.camelcase + 'Importer')
|
||||||
|
end
|
||||||
|
|
||||||
|
def generate_diff(importer, previous_input, current_input)
|
||||||
|
previous_csv = ::CSV.open(previous_input, CSVBaseImporter::PARSE_ARGS)
|
||||||
|
current_csv = ::CSV.open(current_input, CSVBaseImporter::PARSE_ARGS)
|
||||||
|
diff = CsvDiff::Diff.new(importer.identifying_fields)
|
||||||
|
diff.generate(previous_csv, current_csv, deletes: ->(row) { row['status'] = 'deleted' })
|
||||||
|
end
|
||||||
|
|
||||||
|
def add_warning(csv, message)
|
||||||
|
@batch.add_warnings([[csv ? csv[:file] : "", message]])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
|
@ -24,6 +24,10 @@ module SIS
|
||||||
(row.include?('section_id') || row.include?('course_id')) && row.include?('user_id')
|
(row.include?('section_id') || row.include?('course_id')) && row.include?('user_id')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.identifying_fields
|
||||||
|
%w[course_id section_id user_id role associated_user_id].freeze
|
||||||
|
end
|
||||||
|
|
||||||
# expected columns
|
# expected columns
|
||||||
# course_id,user_id,role,section_id,status
|
# course_id,user_id,role,section_id,status
|
||||||
def process(csv)
|
def process(csv)
|
||||||
|
|
|
@ -24,6 +24,10 @@ module SIS
|
||||||
row.include?('group_id') && row.include?('name')
|
row.include?('group_id') && row.include?('name')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.identifying_fields
|
||||||
|
%w[group_id].freeze
|
||||||
|
end
|
||||||
|
|
||||||
# expected columns
|
# expected columns
|
||||||
# group_id,account_id,name,status
|
# group_id,account_id,name,status
|
||||||
def process(csv)
|
def process(csv)
|
||||||
|
|
|
@ -23,6 +23,10 @@ module SIS
|
||||||
row.include?('group_id') && row.include?('user_id')
|
row.include?('group_id') && row.include?('user_id')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.identifying_fields
|
||||||
|
%w[group_id user_id].freeze
|
||||||
|
end
|
||||||
|
|
||||||
# expected columns
|
# expected columns
|
||||||
# group_id,user_id,status
|
# group_id,user_id,status
|
||||||
def process(csv)
|
def process(csv)
|
||||||
|
|
|
@ -88,7 +88,7 @@ module SIS
|
||||||
importer
|
importer
|
||||||
end
|
end
|
||||||
|
|
||||||
def process
|
def prepare
|
||||||
@tmp_dirs = []
|
@tmp_dirs = []
|
||||||
@files.each do |file|
|
@files.each do |file|
|
||||||
if File.file?(file)
|
if File.file?(file)
|
||||||
|
@ -122,6 +122,13 @@ module SIS
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@csvs
|
||||||
|
end
|
||||||
|
|
||||||
|
def process
|
||||||
|
prepare
|
||||||
|
|
||||||
@parallelism = 1 if @total_rows <= @minimum_rows_for_parallel
|
@parallelism = 1 if @total_rows <= @minimum_rows_for_parallel
|
||||||
|
|
||||||
# calculate how often we should update progress to get 1% resolution
|
# calculate how often we should update progress to get 1% resolution
|
||||||
|
|
|
@ -25,6 +25,10 @@ module SIS
|
||||||
row.include?('section_id') && row.include?('name')
|
row.include?('section_id') && row.include?('name')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.identifying_fields
|
||||||
|
%w[section_id].freeze
|
||||||
|
end
|
||||||
|
|
||||||
# expected columns
|
# expected columns
|
||||||
# section_id,course_id,name,status,start_date,end_date
|
# section_id,course_id,name,status,start_date,end_date
|
||||||
def process(csv)
|
def process(csv)
|
||||||
|
|
|
@ -24,6 +24,10 @@ module SIS
|
||||||
#This matcher works because a course has long_name/short_name
|
#This matcher works because a course has long_name/short_name
|
||||||
row.include?('term_id') && row.include?('name')
|
row.include?('term_id') && row.include?('name')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.identifying_fields
|
||||||
|
%w[term_id].freeze
|
||||||
|
end
|
||||||
|
|
||||||
# expected columns
|
# expected columns
|
||||||
# account_id,parent_account_id,name,status
|
# account_id,parent_account_id,name,status
|
||||||
|
|
|
@ -24,6 +24,10 @@ module SIS
|
||||||
row.include?('user_id') && row.include?('login_id')
|
row.include?('user_id') && row.include?('login_id')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.identifying_fields
|
||||||
|
%w[user_id].freeze
|
||||||
|
end
|
||||||
|
|
||||||
# expected columns:
|
# expected columns:
|
||||||
# user_id,login_id,first_name,last_name,email,status
|
# user_id,login_id,first_name,last_name,email,status
|
||||||
def process(csv)
|
def process(csv)
|
||||||
|
|
|
@ -23,6 +23,10 @@ module SIS
|
||||||
def self.is_xlist_csv?(row)
|
def self.is_xlist_csv?(row)
|
||||||
row.include?('xlist_course_id') && row.include?('section_id')
|
row.include?('xlist_course_id') && row.include?('section_id')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.identifying_fields
|
||||||
|
%w[section_id].freeze
|
||||||
|
end
|
||||||
|
|
||||||
# possible columns:
|
# possible columns:
|
||||||
# xlist_course_id, section_id, status
|
# xlist_course_id, section_id, status
|
||||||
|
|
|
@ -67,7 +67,10 @@ describe SisImportsApiController, type: :request do
|
||||||
"batch_mode" => opts[:batch_mode] ? true : nil,
|
"batch_mode" => opts[:batch_mode] ? true : nil,
|
||||||
"override_sis_stickiness" => opts[:override_sis_stickiness] ? true : nil,
|
"override_sis_stickiness" => opts[:override_sis_stickiness] ? true : nil,
|
||||||
"add_sis_stickiness" => opts[:add_sis_stickiness] ? true : nil,
|
"add_sis_stickiness" => opts[:add_sis_stickiness] ? true : nil,
|
||||||
"clear_sis_stickiness" => opts[:clear_sis_stickiness] ? true : nil})
|
"clear_sis_stickiness" => opts[:clear_sis_stickiness] ? true : nil,
|
||||||
|
"diffing_data_set_identifier" => nil,
|
||||||
|
"diffed_against_import_id" => nil,
|
||||||
|
})
|
||||||
batch.process_without_send_later
|
batch.process_without_send_later
|
||||||
return batch
|
return batch
|
||||||
end
|
end
|
||||||
|
@ -101,7 +104,10 @@ describe SisImportsApiController, type: :request do
|
||||||
"batch_mode_term_id" => nil,
|
"batch_mode_term_id" => nil,
|
||||||
"override_sis_stickiness" => nil,
|
"override_sis_stickiness" => nil,
|
||||||
"add_sis_stickiness" => nil,
|
"add_sis_stickiness" => nil,
|
||||||
"clear_sis_stickiness" => nil })
|
"clear_sis_stickiness" => nil,
|
||||||
|
"diffing_data_set_identifier" => nil,
|
||||||
|
"diffed_against_import_id" => nil,
|
||||||
|
})
|
||||||
|
|
||||||
expect(SisBatch.count).to eq @batch_count + 1
|
expect(SisBatch.count).to eq @batch_count + 1
|
||||||
expect(batch.batch_mode).to be_falsey
|
expect(batch.batch_mode).to be_falsey
|
||||||
|
@ -142,7 +148,10 @@ describe SisImportsApiController, type: :request do
|
||||||
"batch_mode_term_id" => nil,
|
"batch_mode_term_id" => nil,
|
||||||
"override_sis_stickiness" => nil,
|
"override_sis_stickiness" => nil,
|
||||||
"add_sis_stickiness" => nil,
|
"add_sis_stickiness" => nil,
|
||||||
"clear_sis_stickiness" => nil })
|
"clear_sis_stickiness" => nil,
|
||||||
|
"diffing_data_set_identifier" => nil,
|
||||||
|
"diffed_against_import_id" => nil,
|
||||||
|
})
|
||||||
end
|
end
|
||||||
|
|
||||||
it "should skip the job for skip_sis_jobs_account_ids" do
|
it "should skip the job for skip_sis_jobs_account_ids" do
|
||||||
|
@ -189,6 +198,20 @@ describe SisImportsApiController, type: :request do
|
||||||
expect(batch.batch_mode_term).to eq @account.default_enrollment_term
|
expect(batch.batch_mode_term).to eq @account.default_enrollment_term
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "should enable diffing mode" do
|
||||||
|
json = api_call(:post,
|
||||||
|
"/api/v1/accounts/#{@account.id}/sis_imports.json",
|
||||||
|
{ controller: 'sis_imports_api', action: 'create',
|
||||||
|
format: 'json', account_id: @account.id.to_s },
|
||||||
|
{ import_type: 'instructure_csv',
|
||||||
|
attachment: fixture_file_upload("files/sis/test_user_1.csv", 'text/csv'),
|
||||||
|
diffing_data_set_identifier: 'my-users-data',
|
||||||
|
})
|
||||||
|
batch = SisBatch.find(json["id"])
|
||||||
|
expect(batch.batch_mode).to be_falsey
|
||||||
|
expect(batch.diffing_data_set_identifier).to eq 'my-users-data'
|
||||||
|
end
|
||||||
|
|
||||||
it "should error if batch mode and the term can't be found" do
|
it "should error if batch mode and the term can't be found" do
|
||||||
expect {
|
expect {
|
||||||
json = api_call(:post,
|
json = api_call(:post,
|
||||||
|
@ -528,7 +551,10 @@ describe SisImportsApiController, type: :request do
|
||||||
"batch_mode_term_id" => nil,
|
"batch_mode_term_id" => nil,
|
||||||
"override_sis_stickiness" => nil,
|
"override_sis_stickiness" => nil,
|
||||||
"add_sis_stickiness" => nil,
|
"add_sis_stickiness" => nil,
|
||||||
"clear_sis_stickiness" => nil }]
|
"clear_sis_stickiness" => nil,
|
||||||
|
"diffing_data_set_identifier" => nil,
|
||||||
|
"diffed_against_import_id" => nil,
|
||||||
|
}]
|
||||||
})
|
})
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,115 @@
|
||||||
|
#
|
||||||
|
# Copyright (C) 2015 Instructure, Inc.
|
||||||
|
#
|
||||||
|
# This file is part of Canvas.
|
||||||
|
#
|
||||||
|
# Canvas is free software: you can redistribute it and/or modify it under
|
||||||
|
# the terms of the GNU Affero General Public License as published by the Free
|
||||||
|
# Software Foundation, version 3 of the License.
|
||||||
|
#
|
||||||
|
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
||||||
|
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
|
||||||
|
# details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License along
|
||||||
|
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
require File.expand_path(File.dirname(__FILE__) + '/../../../spec_helper.rb')
|
||||||
|
|
||||||
|
describe SIS::CSV::DiffGenerator do
|
||||||
|
before :once do
|
||||||
|
account_model
|
||||||
|
@batch = @account.sis_batches.build
|
||||||
|
end
|
||||||
|
|
||||||
|
subject { described_class.new(@account, @batch) }
|
||||||
|
|
||||||
|
def csv(name, data)
|
||||||
|
@files ||= []
|
||||||
|
tf = Tempfile.new("spec")
|
||||||
|
@files << tf
|
||||||
|
tf.write(data)
|
||||||
|
tf.flush
|
||||||
|
{ file: "#{name}.csv", fullpath: tf.path }
|
||||||
|
end
|
||||||
|
|
||||||
|
describe '#generate_csvs' do
|
||||||
|
it 'should skip diffing if previous is empty' do
|
||||||
|
previous = {
|
||||||
|
}
|
||||||
|
|
||||||
|
current = {
|
||||||
|
course: [{ file: 'courses2.csv' }],
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(subject.generate_csvs(previous, current)).to match_array([
|
||||||
|
{ file: 'courses2.csv' },
|
||||||
|
])
|
||||||
|
|
||||||
|
expect(@batch.processing_warnings).to be_empty
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'should skip diffing if previous has more than one file of type' do
|
||||||
|
previous = {
|
||||||
|
user: [{ file: 'users1.csv' }, { file: 'users2.csv' }],
|
||||||
|
}
|
||||||
|
|
||||||
|
current = {
|
||||||
|
user: [{ file: 'users.csv' }],
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(subject.generate_csvs(previous, current)).to match_array([
|
||||||
|
{ file: 'users.csv' },
|
||||||
|
])
|
||||||
|
warning = @batch.processing_warnings.first
|
||||||
|
expect(warning.first).to eq "users.csv"
|
||||||
|
expect(warning.last).to match(%r{diffing against more than one})
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'should skip diffing if current has more than one file of type' do
|
||||||
|
previous = {
|
||||||
|
user: [{ file: 'users.csv' }],
|
||||||
|
}
|
||||||
|
|
||||||
|
current = {
|
||||||
|
user: [{ file: 'users1.csv' }, { file: 'users2.csv' }],
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(subject.generate_csvs(previous, current)).to match_array([
|
||||||
|
{ file: 'users1.csv' }, { file: 'users2.csv' },
|
||||||
|
])
|
||||||
|
warning = @batch.processing_warnings.first
|
||||||
|
expect(warning.first).to eq "users1.csv"
|
||||||
|
expect(warning.last).to match(%r{diffing against more than one})
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'should generate multiple diffs for different file types' do
|
||||||
|
previous = {
|
||||||
|
user: [{ file: 'users1.csv' }, { file: 'users2.csv' }],
|
||||||
|
course: [ csv("courses", "course_id,short_name,status\ncourse_1,test1,active\n") ],
|
||||||
|
account: [ csv("accounts", "account_id,status\naccount_1,active\n") ],
|
||||||
|
group: [ csv("groups", "group_id,status\ngroup_1,deleted\n") ],
|
||||||
|
}
|
||||||
|
current = {
|
||||||
|
user: [{ file: 'users.csv' }],
|
||||||
|
course: [ csv("courses", "course_id,short_name,status\ncourse_2,test2,active\n") ],
|
||||||
|
enrollment: [{ file: 'enrollments.csv' }],
|
||||||
|
account: [ csv("accounts", "account_id,status\naccount_1,active\n") ],
|
||||||
|
group: [ csv("groups", "group_id,status\ngroup_1,active\ngroup_2,active\n") ],
|
||||||
|
}
|
||||||
|
csvs = subject.generate_csvs(previous, current)
|
||||||
|
expect(csvs.size).to eq 5
|
||||||
|
expect(csvs.find { |f| f[:file] == 'users.csv' }).to eq({ file: 'users.csv' })
|
||||||
|
courses = csvs.find { |f| f[:file] == 'courses.csv' }
|
||||||
|
expect(File.read(courses[:fullpath])).to eq("course_id,short_name,status\ncourse_2,test2,active\ncourse_1,test1,deleted\n")
|
||||||
|
expect(csvs.find { |f| f[:file] == 'enrollments.csv' }).to eq({ file: 'enrollments.csv' })
|
||||||
|
accounts = csvs.find { |f| f[:file] == 'accounts.csv' }
|
||||||
|
expect(File.read(accounts[:fullpath])).to eq("account_id,status\n")
|
||||||
|
groups = csvs.find { |f| f[:file] == 'groups.csv' }
|
||||||
|
expect(File.read(groups[:fullpath])).to eq("group_id,status\ngroup_1,active\ngroup_2,active\n")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
|
@ -50,7 +50,7 @@ describe SisBatch do
|
||||||
|
|
||||||
def process_csv_data(data, opts = {})
|
def process_csv_data(data, opts = {})
|
||||||
create_csv_data(data) do |batch|
|
create_csv_data(data) do |batch|
|
||||||
batch.update_attributes(opts) if opts.present?
|
batch.update_attributes(opts, without_protection: true) if opts.present?
|
||||||
batch.process_without_send_later
|
batch.process_without_send_later
|
||||||
batch
|
batch
|
||||||
end
|
end
|
||||||
|
@ -424,4 +424,71 @@ s2,test_1,section2,active},
|
||||||
expect(batch.processing_errors.size).to eq 3
|
expect(batch.processing_errors.size).to eq 3
|
||||||
expect(batch.processing_errors.last).to eq ['', 'There were 3 more errors']
|
expect(batch.processing_errors.last).to eq ['', 'There were 3 more errors']
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context "csv diffing" do
|
||||||
|
it "should skip diffing if previous diff not available" do
|
||||||
|
SIS::CSV::DiffGenerator.any_instance.expects(:generate).never
|
||||||
|
batch = process_csv_data([
|
||||||
|
%{course_id,short_name,long_name,account_id,term_id,status
|
||||||
|
test_1,TC 101,Test Course 101,,term1,active
|
||||||
|
}], diffing_data_set_identifier: 'default')
|
||||||
|
# but still starts the chain
|
||||||
|
expect(batch.diffing_data_set_identifier).to eq 'default'
|
||||||
|
end
|
||||||
|
|
||||||
|
it "joins the chain but doesn't apply the diff when baseline is set" do
|
||||||
|
b1 = process_csv_data([
|
||||||
|
%{course_id,short_name,long_name,account_id,term_id,status
|
||||||
|
test_1,TC 101,Test Course 101,,term1,active
|
||||||
|
}], diffing_data_set_identifier: 'default')
|
||||||
|
|
||||||
|
batch = process_csv_data([
|
||||||
|
%{course_id,short_name,long_name,account_id,term_id,status
|
||||||
|
test_1,TC 101,Test Course 101,,term1,active
|
||||||
|
test_4,TC 104,Test Course 104,,term1,active
|
||||||
|
}], diffing_data_set_identifier: 'default', diffing_remaster: true)
|
||||||
|
expect(batch.diffing_data_set_identifier).to eq 'default'
|
||||||
|
expect(batch.data[:diffed_against_sis_batch_id]).to eq nil
|
||||||
|
expect(batch.generated_diff).to eq nil
|
||||||
|
end
|
||||||
|
|
||||||
|
it "should diff against the most previous successful batch in the same chain" do
|
||||||
|
b1 = process_csv_data([
|
||||||
|
%{course_id,short_name,long_name,account_id,term_id,status
|
||||||
|
test_1,TC 101,Test Course 101,,term1,active
|
||||||
|
}], diffing_data_set_identifier: 'default')
|
||||||
|
|
||||||
|
b2 = process_csv_data([
|
||||||
|
%{course_id,short_name,long_name,account_id,term_id,status
|
||||||
|
test_2,TC 102,Test Course 102,,term1,active
|
||||||
|
}], diffing_data_set_identifier: 'other')
|
||||||
|
|
||||||
|
# doesn't diff against failed imports on the chain
|
||||||
|
b3 = process_csv_data([
|
||||||
|
%{short_name,long_name,account_id,term_id,status
|
||||||
|
TC 103,Test Course 103,,term1,active
|
||||||
|
}], diffing_data_set_identifier: 'default')
|
||||||
|
expect(b3.workflow_state).to eq 'failed_with_messages'
|
||||||
|
|
||||||
|
batch = process_csv_data([
|
||||||
|
%{course_id,short_name,long_name,account_id,term_id,status
|
||||||
|
test_1,TC 101,Test Course 101,,term1,active
|
||||||
|
test_4,TC 104,Test Course 104,,term1,active
|
||||||
|
}], diffing_data_set_identifier: 'default')
|
||||||
|
|
||||||
|
expect(batch.data[:diffed_against_sis_batch_id]).to eq b1.id
|
||||||
|
# test_1 should not have been toched by this last batch, since it was diff'd out
|
||||||
|
expect(@account.courses.find_by_sis_source_id('test_1').sis_batch_id).to eq b1.id
|
||||||
|
expect(@account.courses.find_by_sis_source_id('test_4').sis_batch_id).to eq batch.id
|
||||||
|
|
||||||
|
# check the generated csv file, inside the new attached zip
|
||||||
|
zip = Zip::File.open(batch.generated_diff.open.path)
|
||||||
|
csvs = zip.glob('*.csv')
|
||||||
|
expect(csvs.size).to eq 1
|
||||||
|
expect(csvs.first.get_input_stream.read).to eq(
|
||||||
|
%{course_id,short_name,long_name,account_id,term_id,status
|
||||||
|
test_4,TC 104,Test Course 104,,term1,active
|
||||||
|
})
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue