gradebook importer files through S3
closes CNVS-27034 stops dumping the file to the delayed job parameter, which was never a good idea. This sends it to S3 or file storage and streams it from there on import. With the current setup, way more memory than necessary is consumed while the job is running, and this is the way we process large files in places like SIS anyway so this makes it more consistent. TEST PLAN: 1) gradebook imports should still work 2) gradebook import jobs should not eat memory like popcorn Change-Id: If5bc1aab2eba01c617d6c37480fe777000d80165 Reviewed-on: https://gerrit.instructure.com/71414 Reviewed-by: Derek Bender <djbender@instructure.com> Tested-by: Jenkins QA-Review: KC Naegle <knaegle@instructure.com> Product-Review: Ethan Vizitei <evizitei@instructure.com>
This commit is contained in:
parent
0364c12169
commit
db3ba60b11
|
@ -55,8 +55,7 @@ class GradebookUploadsController < ApplicationController
|
|||
def create
|
||||
if authorized_action(@context, @current_user, :manage_grades)
|
||||
if params[:gradebook_upload]
|
||||
attachment = params[:gradebook_upload][:uploaded_data]
|
||||
@progress = GradebookUpload.queue_from(@context, @current_user, attachment.read)
|
||||
@progress = GradebookUpload.queue_from(@context, @current_user, gradebook_upload_params)
|
||||
js_env gradebook_env(@progress)
|
||||
render :show
|
||||
else
|
||||
|
@ -95,4 +94,8 @@ class GradebookUploadsController < ApplicationController
|
|||
user_id: @current_user
|
||||
).first
|
||||
end
|
||||
|
||||
def gradebook_upload_params
|
||||
strong_params.require(:gradebook_upload).permit(:uploaded_data)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -22,12 +22,15 @@ class GradebookUpload < ActiveRecord::Base
|
|||
belongs_to :course
|
||||
belongs_to :user
|
||||
belongs_to :progress
|
||||
has_many :attachments, as: :context, dependent: :destroy
|
||||
|
||||
serialize :gradebook, JSON
|
||||
|
||||
def self.queue_from(course, user, params)
|
||||
progress = Progress.create!(:context => course, :tag => "gradebook_upload", user: user)
|
||||
progress.process_job(GradebookImporter, :create_from, {}, course, user, params)
|
||||
def self.queue_from(course, user, attachment_data)
|
||||
progress = Progress.create!(context: course, tag: "gradebook_upload", user: user)
|
||||
gradebook_upload = GradebookUpload.create!(course: course, user: user, progress: progress)
|
||||
gradebook_upload_attachment = gradebook_upload.attachments.create!(attachment_data)
|
||||
progress.process_job(GradebookImporter, :create_from, {}, gradebook_upload, user, gradebook_upload_attachment)
|
||||
progress
|
||||
end
|
||||
|
||||
|
|
|
@ -37,22 +37,42 @@ class GradebookImporter
|
|||
end
|
||||
end
|
||||
|
||||
attr_reader :context, :contents, :assignments, :students, :submissions, :missing_assignments, :missing_students
|
||||
attr_reader :context, :contents, :attachment, :assignments, :students,
|
||||
:submissions, :missing_assignments, :missing_students, :upload
|
||||
|
||||
def self.create_from(progress, course, user, attachment)
|
||||
uploaded_gradebook = new(course, attachment, user, progress)
|
||||
uploaded_gradebook.parse!
|
||||
# TODO: reduce these to "gradebook_upload" and "attachment" once job queue
|
||||
# is clear of old enqueued jobs
|
||||
def self.create_from(progress, gradebook_upload_or_course, user, attachment_or_contents)
|
||||
self.new(gradebook_upload_or_course, attachment_or_contents, user, progress).parse!
|
||||
end
|
||||
|
||||
def initialize(context=nil, csv=nil, user=nil, progress=nil)
|
||||
raise ArgumentError, "Must provide a valid context for this gradebook." unless valid_context?(context)
|
||||
raise ArgumentError, "Must provide CSV contents." unless csv
|
||||
@context = context
|
||||
def initialize(context=nil, csv_contents_or_attachment=nil, user=nil, progress=nil)
|
||||
# TODO: change the parameter to "gradebook_upload", and remove these conditionals
|
||||
# once we know the S3 pipeline works and old jobs are cleared
|
||||
if context.is_a?(GradebookUpload)
|
||||
@upload = context
|
||||
@context = @upload.course
|
||||
else
|
||||
@context = context
|
||||
end
|
||||
|
||||
raise ArgumentError, "Must provide a valid context for this gradebook." unless valid_context?(@context)
|
||||
# TODO: Change this error to "Must provide attachment id when we're sure S3 pipeline works"
|
||||
raise ArgumentError, "Must provide CSV contents or attachment." unless csv_contents_or_attachment
|
||||
|
||||
@user = user
|
||||
@contents = csv
|
||||
# TODO: change the parameter to "attachment", and remove these conditionals
|
||||
# once we know the S3 pipeline works and old jobs are cleared
|
||||
if csv_contents_or_attachment.is_a?(Attachment)
|
||||
@attachment = csv_contents_or_attachment
|
||||
else
|
||||
@contents = csv_contents_or_attachment
|
||||
end
|
||||
@progress = progress
|
||||
|
||||
@upload = GradebookUpload.new course: @context, user: @user, progress: @progress
|
||||
# TODO: remove this line entirely
|
||||
# once we know the S3 pipeline works and old jobs are cleared
|
||||
@upload ||= GradebookUpload.new course: @context, user: @user, progress: @progress
|
||||
|
||||
if @context.feature_enabled?(:differentiated_assignments)
|
||||
@visible_assignments = AssignmentStudentVisibility.visible_assignment_ids_in_course_by_user(
|
||||
|
@ -80,24 +100,19 @@ class GradebookImporter
|
|||
.select(['users.id', :name, :sortable_name])
|
||||
.index_by(&:id)
|
||||
|
||||
csv = CSV.parse(contents, :converters => :nil)
|
||||
@assignments = process_header(csv)
|
||||
@assignments = nil
|
||||
@root_accounts = {}
|
||||
@pseudonyms_by_sis_id = {}
|
||||
@pseudonyms_by_login_id = {}
|
||||
@students = []
|
||||
@pp_row = []
|
||||
csv.each do |row|
|
||||
if row[0] =~ /Points Possible/
|
||||
row.shift(@student_columns)
|
||||
process_pp(row)
|
||||
next
|
||||
|
||||
csv_stream do |row|
|
||||
already_processed = check_for_non_student_row(row)
|
||||
unless already_processed
|
||||
@students << process_student(row)
|
||||
process_submissions(row, @students.last)
|
||||
end
|
||||
|
||||
next if row.compact.all? { |c| c.strip =~ /^(Muted|)$/i }
|
||||
|
||||
@students << process_student(row)
|
||||
process_submissions(row, @students.last)
|
||||
end
|
||||
|
||||
@assignments_outside_current_periods = []
|
||||
|
@ -202,8 +217,7 @@ class GradebookImporter
|
|||
end
|
||||
end
|
||||
|
||||
def process_header(csv)
|
||||
row = csv.shift
|
||||
def process_header(row)
|
||||
raise "Couldn't find header row" unless header?(row)
|
||||
|
||||
row = strip_non_assignment_columns(row)
|
||||
|
@ -357,6 +371,53 @@ class GradebookImporter
|
|||
end
|
||||
|
||||
protected
|
||||
|
||||
CSV_PARSE_OPTIONS = {
|
||||
converters: :nil,
|
||||
skip_lines: /^[, ]*$/
|
||||
}.freeze
|
||||
|
||||
def check_for_non_student_row(row)
|
||||
# check if this is the first row, a header row
|
||||
if @assignments.nil?
|
||||
@assignments = process_header(row)
|
||||
return true
|
||||
end
|
||||
|
||||
|
||||
if row[0] =~ /Points Possible/
|
||||
# this row is describing the assignment, has no student data
|
||||
row.shift(@student_columns)
|
||||
process_pp(row)
|
||||
return true
|
||||
end
|
||||
|
||||
if row.compact.all? { |c| c.strip =~ /^(Muted|)$/i }
|
||||
# this row is muted or empty and should not be processed at all
|
||||
return true
|
||||
end
|
||||
|
||||
false # nothing unusual, signal to process as a student row
|
||||
end
|
||||
|
||||
def csv_stream
|
||||
if contents # TODO: remove this branch entirely when S3 is proved to work
|
||||
CSV.parse(contents, CSV_PARSE_OPTIONS) do |row|
|
||||
yield row
|
||||
end
|
||||
elsif attachment
|
||||
csv_file = attachment.open(need_local_file: true)
|
||||
# using "foreach" rather than "parse" processes a chunk of the
|
||||
# file at a time rather than loading the whole file into memory
|
||||
# at once, a boon for memory consumption
|
||||
CSV.foreach(csv_file.path, CSV_PARSE_OPTIONS) do |row|
|
||||
yield row
|
||||
end
|
||||
else
|
||||
raise "We have no gradebook input to iterate over..."
|
||||
end
|
||||
end
|
||||
|
||||
def add_root_account_to_pseudonym_cache(root_account)
|
||||
pseudonyms = root_account.shard.activate do
|
||||
root_account.pseudonyms
|
||||
|
|
|
@ -32,17 +32,21 @@ describe GradebookUploadsController do
|
|||
@course.reload
|
||||
end
|
||||
|
||||
def generate_file(include_sis_id)
|
||||
def generate_file(include_sis_id=false)
|
||||
file = Tempfile.new("csv.csv")
|
||||
file.puts(GradebookExporter.new(@course, @teacher, :include_sis_id => include_sis_id).to_csv)
|
||||
file.close
|
||||
file
|
||||
end
|
||||
|
||||
def upload_gradebook_import(course, file)
|
||||
data = Rack::Test::UploadedFile.new(file.path, 'text/csv', true)
|
||||
post 'create', course_id: course.id, gradebook_upload: {uploaded_data: data}
|
||||
end
|
||||
|
||||
def check_create_response(include_sis_id=false)
|
||||
file = generate_file(include_sis_id)
|
||||
data = Rack::Test::UploadedFile.new(file.path, 'text/csv', true)
|
||||
post 'create', :course_id => @course.id, :gradebook_upload => {:uploaded_data => data}
|
||||
upload_gradebook_import(@course, file)
|
||||
expect(response).to be_success
|
||||
end
|
||||
|
||||
|
@ -77,23 +81,34 @@ describe GradebookUploadsController do
|
|||
assert_unauthorized
|
||||
end
|
||||
|
||||
it "should accept a valid csv upload" do
|
||||
user_session(@teacher)
|
||||
check_create_response
|
||||
end
|
||||
context "with authorized teacher" do
|
||||
before(:each) { user_session(@teacher) }
|
||||
|
||||
it "should accept a valid csv upload with a final grade column" do
|
||||
user_session(@teacher)
|
||||
@course.grading_standard_id = 0
|
||||
@course.save!
|
||||
check_create_response
|
||||
end
|
||||
it "should accept a valid csv upload" do
|
||||
check_create_response
|
||||
end
|
||||
|
||||
it "should accept a valid csv upload with sis id columns" do
|
||||
user_session(@teacher)
|
||||
@course.grading_standard_id = 0
|
||||
@course.save!
|
||||
check_create_response(true)
|
||||
it "puts the uploaded data into a durable attachment so it's recoverable" do
|
||||
gradebook_import_file = generate_file
|
||||
upload_gradebook_import(@course, gradebook_import_file)
|
||||
attachment = Attachment.last
|
||||
expect(gradebook_import_file.path).to include(attachment.filename)
|
||||
end
|
||||
|
||||
context "and final grade column" do
|
||||
before(:each) do
|
||||
@course.grading_standard_id = 0
|
||||
@course.save!
|
||||
end
|
||||
|
||||
it "should accept a valid csv upload with a final grade column" do
|
||||
check_create_response
|
||||
end
|
||||
|
||||
it "should accept a valid csv upload with sis id columns" do
|
||||
check_create_response(true)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -21,34 +21,35 @@ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
|
|||
require 'csv'
|
||||
|
||||
describe GradebookImporter do
|
||||
let(:gradebook_user){ user_model }
|
||||
|
||||
context "construction" do
|
||||
let!(:gradebook_course){ course_model }
|
||||
|
||||
it "should require a context, usually a course" do
|
||||
course = course_model
|
||||
user = user_model
|
||||
progress = Progress.create!(tag: "test", context: @user)
|
||||
|
||||
expect{GradebookImporter.new(1)}.to raise_error(ArgumentError, "Must provide a valid context for this gradebook.")
|
||||
expect{GradebookImporter.new(course, valid_gradebook_contents, user, progress)}.not_to raise_error
|
||||
expect{GradebookImporter.new(gradebook_course, valid_gradebook_contents, user, progress)}.not_to raise_error
|
||||
end
|
||||
|
||||
it "should store the context and make it available" do
|
||||
course_model
|
||||
new_gradebook_importer
|
||||
expect(@gi.context).to be_is_a(Course)
|
||||
end
|
||||
|
||||
it "should require the contents of an upload" do
|
||||
expect{GradebookImporter.new(course_model)}.to raise_error(ArgumentError, "Must provide CSV contents.")
|
||||
expect{ GradebookImporter.new(gradebook_course) }.
|
||||
to raise_error(ArgumentError, "Must provide CSV contents or attachment.")
|
||||
end
|
||||
|
||||
it "should store the contents and make them available" do
|
||||
course_model
|
||||
new_gradebook_importer
|
||||
expect(@gi.contents).not_to be_nil
|
||||
end
|
||||
|
||||
it "should handle points possible being sorted in weird places" do
|
||||
course_model
|
||||
importer_with_rows(
|
||||
'Student,ID,Section,Assignment 1,Final Score',
|
||||
'"Blend, Bill",6,My Course,-,',
|
||||
|
@ -60,7 +61,6 @@ describe GradebookImporter do
|
|||
end
|
||||
|
||||
it "should handle muted line and being sorted in weird places" do
|
||||
course_model
|
||||
importer_with_rows(
|
||||
'Student,ID,Section,Assignment 1,Final Score',
|
||||
'"Blend, Bill",6,My Course,-,',
|
||||
|
@ -73,10 +73,47 @@ describe GradebookImporter do
|
|||
end
|
||||
|
||||
it "creates a GradebookUpload" do
|
||||
course_model
|
||||
new_gradebook_importer
|
||||
expect(GradebookUpload.where(course_id: @course, user_id: @user)).not_to be_empty
|
||||
end
|
||||
|
||||
context "when attachment and gradebook_upload is provided" do
|
||||
|
||||
let(:attachment) do
|
||||
a = attachment_model
|
||||
file = Tempfile.new("gradebook.csv")
|
||||
file.puts("'Student,ID,Section,Assignment 1,Final Score'\n")
|
||||
file.puts("\"Blend, Bill\",6,My Course,-,\n")
|
||||
file.close
|
||||
a.stubs(:open).returns(file)
|
||||
return a
|
||||
end
|
||||
|
||||
let(:progress){ Progress.create!(tag: "test", context: gradebook_user) }
|
||||
|
||||
let(:upload) do
|
||||
GradebookUpload.create!(course: gradebook_course, user: gradebook_user, progress: progress)
|
||||
end
|
||||
|
||||
let(:importer) { new_gradebook_importer(attachment, upload, gradebook_user, progress) }
|
||||
|
||||
it "hangs onto the provided model for streaming" do
|
||||
expect(importer.attachment).to eq(attachment)
|
||||
end
|
||||
|
||||
it "nils out contents when using an attachment (saves on memory to not parse all at once)" do
|
||||
expect(importer.contents).to be_nil
|
||||
end
|
||||
|
||||
it "keeps the provided upload rather than creating a new one" do
|
||||
expect(importer.upload).to eq(upload)
|
||||
end
|
||||
|
||||
it "sets the uploads course as the importer context" do
|
||||
expect(importer.context).to eq(gradebook_course)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
context "User lookup" do
|
||||
|
@ -593,11 +630,11 @@ CSV
|
|||
end
|
||||
end
|
||||
|
||||
def new_gradebook_importer(contents = valid_gradebook_contents)
|
||||
@user = user_model
|
||||
@progress = Progress.create!(tag: "test", context: @user)
|
||||
def new_gradebook_importer(contents = valid_gradebook_contents, context = @course, user = gradebook_user, progress = nil)
|
||||
@user = user
|
||||
@progress = progress || Progress.create!(tag: "test", context: @user)
|
||||
|
||||
@gi = GradebookImporter.new(@course, contents, @user, @progress)
|
||||
@gi = GradebookImporter.new(context, contents, @user, @progress)
|
||||
@gi.parse!
|
||||
@gi
|
||||
end
|
||||
|
@ -611,5 +648,12 @@ def valid_gradebook_contents_with_sis_login_id
|
|||
end
|
||||
|
||||
def importer_with_rows(*rows)
|
||||
new_gradebook_importer(rows.join("\n"))
|
||||
a = attachment_model
|
||||
file = Tempfile.new("gradebook_import.csv")
|
||||
rows.each do |row|
|
||||
file.puts(row)
|
||||
end
|
||||
file.close
|
||||
a.stubs(:open).returns(file)
|
||||
new_gradebook_importer(a)
|
||||
end
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
#
|
||||
# Copyright (C) 2016 Instructure, Inc.
|
||||
#
|
||||
# This file is part of Canvas.
|
||||
#
|
||||
# Canvas is free software: you can redistribute it and/or modify it under
|
||||
# the terms of the GNU Affero General Public License as published by the Free
|
||||
# Software Foundation, version 3 of the License.
|
||||
#
|
||||
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
||||
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
|
||||
# details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License along
|
||||
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
require_relative '../spec_helper'
|
||||
|
||||
describe GradebookUpload do
|
||||
describe ".queue_from" do
|
||||
let(:teacher_enrollment){ teacher_in_course }
|
||||
let(:teacher){ teacher_enrollment.user }
|
||||
let(:gradebook_course){ teacher_enrollment.course }
|
||||
let(:attachment_data){ {dummy: "data"} }
|
||||
|
||||
before(:each) do
|
||||
# actual attachment integration covered in gradebook_uploads_controller_spec;
|
||||
# that means in the spec the dummy hash will be enqueued instead of a real attachment
|
||||
# object
|
||||
GradebookUpload.any_instance.stubs(attachments: stub(create!: attachment_data))
|
||||
end
|
||||
|
||||
it "builds a progress object to track the import" do
|
||||
progress = GradebookUpload.queue_from(gradebook_course, teacher, attachment_data)
|
||||
expect(progress.workflow_state).to eq("queued")
|
||||
expect(progress.tag).to eq("gradebook_upload")
|
||||
expect(progress.context).to eq(gradebook_course)
|
||||
end
|
||||
|
||||
it "queues a job to run the import" do
|
||||
GradebookUpload.queue_from(gradebook_course, teacher, attachment_data)
|
||||
job = Delayed::Job.last
|
||||
expect(job.tag).to match(/GradebookImporter/)
|
||||
end
|
||||
|
||||
it "stores the input data in an attachment on the gradebook upload" do
|
||||
GradebookUpload.queue_from(gradebook_course, teacher, attachment_data)
|
||||
job = Delayed::Job.last
|
||||
expect(job.handler).to include("dummy: data")
|
||||
end
|
||||
|
||||
it "creates a GradebookUpload object to represent the upload process" do
|
||||
GradebookUpload.queue_from(gradebook_course, teacher, attachment_data)
|
||||
upload = GradebookUpload.last
|
||||
expect(upload.course).to eq(gradebook_course)
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue