Friendly error messages on outcomes CSV import

Fixes OUT-1885 Test Plan: - Find your canvas account id. - Create an invalid CSV import file by copying: spec/lib/outcomes/fixtures/demo.csv to, for example, error.csv. Delete one of the outcome groups in this file, which will invalidate the `parent_guid` references to it. - Run (if you have sharding enabled, add `switch_to_shard ... ; ` before the Outcomes::CsvImporter command): dc run --rm web bundle exec rails r \ "puts Outcomes::CsvImporter.new('error.csv', Account.find_by(id: $ID)).run" - Verify that an error for the missing parent reference has been generated. Change-Id: I252c97281121aee597464a4417a6e0657fba03a8 Reviewed-on: https://gerrit.instructure.com/141670 Tested-by: Jenkins Reviewed-by: Augusto Callejas <acallejas@instructure.com> QA-Review: Andrew Porter <hporter-c@instructure.com> Product-Review: Sidharth Oberoi <soberoi@instructure.com>
2018-02-12 17:56:58 -05:00 · 2018-02-12 17:56:58 -05:00 · 8ee874ef3a
parent 5a247826c1
commit 8ee874ef3a
7 changed files with 356 additions and 40 deletions
--- a/lib/outcomes/csv_importer.rb
+++ b/lib/outcomes/csv_importer.rb
@ -47,29 +47,62 @@ module Outcomes

    def run
      headers = nil
-      CSV.new(File.new(@path, 'r:UTF-8')).each_slice(BATCH_SIZE) do |batch|
-        headers ||= validate_headers(batch.shift)
-        Account.transaction do
-          batch.each do |row|
-            import_row(headers, row)
+      errors = []
+      begin
+        rows = CSV.new(File.new(@path, 'rb')).to_enum
+        rows.with_index(1).each_slice(BATCH_SIZE) do |batch|
+          headers ||= validate_headers(*batch.shift)
+          errors += parse_batch(headers, batch)
+        end
+      rescue ParseError => e
+        return [[1, e.message]]
+      end
+
+      errors
+    end
+
+    def parse_batch(headers, batch)
+      Account.transaction do
+        results = batch.map do |row, line|
+          begin
+            utf8_row = row.map(&method(:check_encoding))
+            import_row(headers, utf8_row)
+            []
+          rescue ParseError => e
+            [[line, e.message]]
+          rescue ActiveRecord::RecordInvalid => e
+            errors = e.record.errors
+            errors.set_reporter(:array, :human)
+            errors.to_a.map { |err| [line, err] }
          end
        end
+
+        results.flatten(1)
      end
    end

    private

-    def validate_headers(row)
+    def check_encoding(str)
+      encoded = str&.force_encoding('utf-8')
+      valid = (encoded || '').valid_encoding?
+      raise ParseError, I18n.t('Not a valid utf-8 string: %{string}', string: str.inspect) unless valid
+      encoded
+    end
+
+    def validate_headers(row, _index)
      main_columns_end = row.find_index('ratings') || row.length
-      headers = row.slice(0, main_columns_end).map(&:to_sym).to_a
+      headers = row.slice(0, main_columns_end).map(&:to_sym)

-      # OUT-1885 : validate that ratings headers are empty
+      after_ratings = row[(main_columns_end + 1)..-1] || []
+      after_ratings = after_ratings.select(&:present?).map(&:to_s)
+      raise ParseError, I18n.t("Invalid fields after ratings: %{fields}", fields: after_ratings.inspect) unless after_ratings.empty?

-      missing = REQUIRED_FIELDS - headers
-      raise ParseError, "Missing required fields: #{missing.inspect}" unless missing.empty?
+      missing = (REQUIRED_FIELDS - headers).map(&:to_s)
+      raise ParseError, I18n.t("Missing required fields: %{fields}", fields: missing.inspect) unless missing.empty?

-      invalid = headers - OPTIONAL_FIELDS - REQUIRED_FIELDS
-      raise ParseError, "Invalid fields: #{invalid.inspect}" unless invalid.empty?
+      invalid = (headers - OPTIONAL_FIELDS - REQUIRED_FIELDS).map(&:to_s)
+      raise ParseError, I18n.t("Invalid fields: %{fields}", fields: invalid.inspect) unless invalid.empty?

      headers
    end
@ -84,12 +117,29 @@ module Outcomes
    end

    def parse_ratings(ratings)
-      drop_trailing_nils(ratings).each_slice(2).to_a.map do |points, description|
-        # OUT-1885 : validate that points are in order and not nil
+      prior = nil
+      drop_trailing_nils(ratings).each_slice(2).to_a.map.with_index(1) do |(points, description), index|
+        raise ParseError, I18n.t("Points for rating tier %{index} not present", index: index) if points.nil? || points.blank?
+        points = strict_parse_int(points, index)
+
+        if prior.present? && prior < points
+          raise ParseError, I18n.t(
+            "Points for tier %{index} must be less than points for prior tier (%{points} is greater than %{prior})",
+            index: index, prior: prior, points: points
+          )
+        end
+
+        prior = points
        { points: points, description: description }
      end
    end

+    def strict_parse_int(v, index)
+      Integer(v)
+    rescue ArgumentError
+      raise ParseError, I18n.t('Invalid points for rating tier %{index}: "%{i}"', index: index, i: v)
+    end
+
    def drop_trailing_nils(array)
      array.pop while array.last.nil? && !array.empty?
      array
--- a/lib/outcomes/import.rb
+++ b/lib/outcomes/import.rb
@ -22,24 +22,40 @@ module Outcomes
    class ParseError < RuntimeError
    end

+    OBJECT_ONLY_FIELDS = %i[calculation_method calculation_int ratings].freeze
+    VALID_WORKFLOWS = ['', 'active', 'deleted'].freeze
+
+    def check_object(object)
+      raise ParseError, Messages.field_required('vendor_guid') if object[:vendor_guid].blank?
+      raise ParseError, Messages.vendor_guid_no_spaces if object[:vendor_guid].include? ' '
+      raise ParseError, Messages.field_required('title') if object[:title].blank?
+
+      valid_workflow = VALID_WORKFLOWS.include? object[:workflow_state]
+      raise ParseError, Messages.workflow_state_invalid unless valid_workflow
+    end
+
    def import_object(object)
+      check_object(object)
+
      type = object[:object_type]
      if type == 'outcome'
        import_outcome(object)
      elsif type == 'group'
        import_group(object)
      else
-        raise ParseError, "Invalid object type: #{type}"
+        raise ParseError, Messages.invalid_object_type(type)
      end
    end

    def import_group(group)
+      invalid = group.keys.select do |k|
+        group[k].present? && OBJECT_ONLY_FIELDS.include?(k)
+      end
+      raise ParseError, Messages.invalid_group_fields(invalid) if invalid.present?
      parents = find_parents(group)
-      raise ParseError, "An outcome group can only have one parent" if parents.length > 1
+      raise ParseError, Messages.group_single_parent if parents.length > 1
      parent = parents.first

-      # OUT-1885 : require title / vendor_guid fields
-
      LearningOutcomeGroup.create!(
        context: @context,
        vendor_guid: group[:vendor_guid],
@ -53,8 +69,6 @@ module Outcomes
    def import_outcome(outcome)
      parents = find_parents(outcome)

-      # OUT-1885 : require title / vendor_guid fields
-
      imported = LearningOutcome.new(
        context: @context,
        title: outcome[:title],
@ -87,10 +101,46 @@ module Outcomes
      return [root_parent] if object[:parent_guids].nil? || object[:parent_guids].blank?

      guids = object[:parent_guids].strip.split
-      # OUT-1885 : throw error on missing parents
-      LearningOutcomeGroup.where(plural_vendor_clause(guids))
+      parents = LearningOutcomeGroup.where(plural_vendor_clause(guids)).to_a
+      missing = guids - parents.map(&:vendor_guid)
+      raise ParseError, Messages.missing_parents(missing) if missing.present?
+
+      parents
+    end
+
+    module Messages
+      def self.workflow_state_invalid
+        I18n.t(
+          '"%{field}" must be either "%{active}" or "%{deleted}"',
+          field: 'workflow_state',
+          active: 'active',
+          deleted: 'deleted'
+        )
+      end
+
+      def self.field_required(field)
+        I18n.t('The "%{field}" field is required', field: field)
+      end
+
+      def self.vendor_guid_no_spaces
+        I18n.t('The "%{field}" field must have no spaces', field: 'vendor_guid')
+      end
+
+      def self.invalid_object_type(type)
+        I18n.t('Invalid %{field}: "%{type}"', field: 'object_type', type: type)
+      end
+
+      def self.invalid_group_fields(invalid)
+        I18n.t('Invalid fields for a group: %{invalid}', invalid: invalid.map(&:to_s).inspect)
+      end
+
+      def self.group_single_parent
+        I18n.t('An outcome group can only have one parent')
+      end
+
+      def self.missing_parents(missing)
+        I18n.t('Missing parent groups: %{missing}', missing: missing.inspect)
+      end
    end
  end
-
-  # OUT-1885 : need lots of tests to verify that errors are thrown properly
 end
--- a/spec/lib/outcomes/csv_importer_spec.rb
+++ b/spec/lib/outcomes/csv_importer_spec.rb
@ -29,14 +29,19 @@ describe Outcomes::CsvImporter do
      outcomes.merge(groups)
    end

+    def expect_ok_import(path)
+      errors = Outcomes::CsvImporter.new(path, nil).run
+      expect(errors).to eq([])
+    end
+
    it 'can import the demo csv file' do
-      Outcomes::CsvImporter.new(csv_path('demo'), nil).run
+      expect_ok_import(csv_path('demo'))
      expect(LearningOutcomeGroup.count).to eq(3)
      expect(LearningOutcome.count).to eq(1)
    end

    it 'imports group attributes correctly' do
-      Outcomes::CsvImporter.new(csv_path('demo'), nil).run
+      expect_ok_import(csv_path('demo'))

      group = by_guid['b']
      expect(group.title).to eq('B')
@ -45,7 +50,7 @@ describe Outcomes::CsvImporter do
    end

    it 'imports outcome attributes correctly' do
-      Outcomes::CsvImporter.new(csv_path('demo'), nil).run
+      expect_ok_import(csv_path('demo'))

      outcome = by_guid['c']
      expect(outcome.title).to eq('C')
@ -61,7 +66,7 @@ describe Outcomes::CsvImporter do
    end

    it 'imports ratings correctly' do
-      Outcomes::CsvImporter.new(csv_path('scoring'), nil).run
+      expect_ok_import(csv_path('scoring'))

      criteria = by_guid['c'].rubric_criterion
      ratings = criteria[:ratings].sort_by { |r| r[:points] }
@ -75,13 +80,13 @@ describe Outcomes::CsvImporter do
    end

    it 'works when no ratings are present' do
-      Outcomes::CsvImporter.new(csv_path('no-ratings'), nil).run
+      expect_ok_import(csv_path('no-ratings'))

      expect(by_guid['c'].rubric_criterion).to eq(nil)
    end

    it 'properly sets scoring types' do
-      Outcomes::CsvImporter.new(csv_path('scoring'), nil).run
+      expect_ok_import(csv_path('scoring'))

      by_method = LearningOutcome.all.to_a.group_by(&:calculation_method)

@ -94,16 +99,227 @@ describe Outcomes::CsvImporter do

    it 'can import a utf-8 csv file with non-ascii characters' do
      guid = 'søren'
-      Outcomes::CsvImporter.new(csv_path('nor'), nil).run
+      expect_ok_import(csv_path('nor'))
      expect(LearningOutcomeGroup.where(vendor_guid: guid).count).to eq(1)
    end

    it 'can import csv files with chinese characters' do
      guid = '作戰'
-      Outcomes::CsvImporter.new(csv_path('chn'), nil).run
+      expect_ok_import(csv_path('chn'))
      expect(LearningOutcomeGroup.where(vendor_guid: guid).count).to eq(1)
    end
  end

-  # OUT-1885 : need lots of specs for testing error messages on invalid row content
+  def import_fake_csv(rows)
+    Tempfile.open do |tf|
+      CSV.open(tf.path, 'wb') do |csv|
+        rows.each { |r| csv << r }
+      end
+      Outcomes::CsvImporter.new(tf.path, nil).run
+    end
+  end
+
+  def expect_import_error(rows, expected)
+    errors = import_fake_csv(rows)
+    expect(errors).to eq(expected)
+  end
+
+  describe 'throws user-friendly header errors' do
+    it 'when required headers are missing' do
+      expect_import_error(
+        [['parent_guids', 'ratings']],
+        [[1, 'Missing required fields: ["title", "vendor_guid", "object_type"]']]
+      )
+    end
+
+    it 'when other headers are after the ratings header' do
+      expect_import_error(
+        [['parent_guids', 'ratings', 'vendor_guid', '', 'blagh', nil]],
+        [[1, 'Invalid fields after ratings: ["vendor_guid", "blagh"]']]
+      )
+    end
+
+    it 'when invalid headers are present' do
+      expect_import_error(
+        [['vendor_guid', 'title', 'object_type', 'spanish_inquisition', 'parent_guids', 'ratings']],
+        [[1, 'Invalid fields: ["spanish_inquisition"]']]
+      )
+    end
+  end
+
+  describe 'throws user-friendly row errors' do
+    let(:headers) do
+      %w[
+        title
+        vendor_guid
+        object_type
+        parent_guids
+        calculation_method
+        calculation_int
+        workflow_state
+      ]
+    end
+
+    def outcome_row(**changes)
+      valid = {
+        title: 'title',
+        vendor_guid: SecureRandom.uuid,
+        object_type: 'outcome',
+        parent_guids: '',
+        calculation_method: 'highest',
+        calculation_int: '',
+        workflow_state: ''
+      }
+
+      row = valid.merge(changes)
+      headers.map { |k| row[k.to_sym] }
+    end
+
+    def group_row(**changes)
+      valid = {
+        title: 'title',
+        vendor_guid: SecureRandom.uuid,
+        object_type: 'group',
+        parent_guids: '',
+        calculation_method: '',
+        calculation_int: '',
+        workflow_state: ''
+      }
+
+      row = valid.merge(changes)
+      headers.map { |k| row[k.to_sym] }
+    end
+
+    it 'if rating tiers have points missing' do
+      expect_import_error(
+        [
+          headers + ['ratings'],
+          outcome_row + ['1', 'Sad Trombone', '', 'Zesty Trombone']
+        ],
+        [[2, 'Points for rating tier 2 not present']]
+      )
+    end
+
+    it 'if rating tiers have invalid points values' do
+      expect_import_error(
+        [
+          headers + ['ratings'],
+          outcome_row + ['1', 'Sad Trombone', 'bwaaaaaa bwa bwaaaaa', 'Zesty Trombone']
+        ],
+        [[2, 'Invalid points for rating tier 2: "bwaaaaaa bwa bwaaaaa"']]
+      )
+    end
+
+    it 'if rating tiers have points in wrong order' do
+      expect_import_error(
+        [
+          headers + ['ratings'],
+          outcome_row + ['1', 'Sad Trombone', '2', 'Zesty Trombone']
+        ],
+        [[2, 'Points for tier 2 must be less than points for prior tier (2 is greater than 1)']]
+      )
+    end
+
+    it 'if object_type is incorrect' do
+      expect_import_error(
+        [
+          headers,
+          group_row(object_type: 'giraffe'),
+        ],
+        [[2, 'Invalid object_type: "giraffe"']]
+      )
+    end
+
+    it 'if parent_guids refers to missing outcomes' do
+      expect_import_error(
+        [
+          headers,
+          group_row(vendor_guid: 'a'),
+          outcome_row(vendor_guid: 'child', parent_guids: 'a b c'),
+        ],
+        [[3, 'Missing parent groups: ["b", "c"]']]
+      )
+    end
+
+    it 'if required fields are missing' do
+      expect_import_error(
+        [
+          headers,
+          outcome_row(object_type: 'group', title: ''),
+        ],
+        [[2, 'The "title" field is required']]
+      )
+
+      expect_import_error(
+        [
+          headers,
+          outcome_row(vendor_guid: ''),
+        ],
+        [[2, 'The "vendor_guid" field is required']]
+      )
+    end
+
+    it 'if vendor_guid is invalid' do
+      expect_import_error(
+        [
+          headers,
+          outcome_row(vendor_guid: 'look some spaces'),
+        ],
+        [[2, 'The "vendor_guid" field must have no spaces']]
+      )
+    end
+
+    it 'if workflow_state is invalid' do
+      expect_import_error(
+        [
+          headers,
+          outcome_row(workflow_state: 'limbo'),
+          group_row(workflow_state: 'limbo'),
+        ],
+        [
+          [2, '"workflow_state" must be either "active" or "deleted"'],
+          [3, '"workflow_state" must be either "active" or "deleted"'],
+        ]
+      )
+    end
+
+    it 'if a value has an invalid utf-8 byte sequence' do
+      expect_import_error(
+        [
+          headers,
+          outcome_row(title: "evil \xFF utf-8".force_encoding("ASCII-8BIT")),
+        ],
+        [
+          [2, "Not a valid utf-8 string: \"evil \\xFF utf-8\""]
+        ]
+      )
+    end
+
+    it 'if a validation fails' do
+      methods = '["decaying_average", "n_mastery", "highest", "latest"]'
+      expect_import_error(
+        [
+          headers,
+          outcome_row(calculation_method: 'goofy'),
+        ],
+        [
+          [2, "Calculation method calculation_method must be one of the following: #{methods}"],
+        ]
+      )
+    end
+
+    it 'if a group receives invalid fields' do
+      expect_import_error(
+        [
+          headers + ['ratings'],
+          group_row(
+            vendor_guid: 'a',
+            calculation_method: 'n_mastery',
+            calculation_int: '5',
+          ) + ['1', 'Sad Trombone'],
+        ],
+        [[2, 'Invalid fields for a group: ["calculation_method", "calculation_int", "ratings"]']]
+      )
+    end
+  end
 end
--- a/spec/lib/outcomes/fixtures/chn.csv
+++ b/spec/lib/outcomes/fixtures/chn.csv
@ -1,4 +1,4 @@
 canvas_id,vendor_guid,object_type,title,description,display_name,calculation_method,calculation_int,workflow_state,parent_guids,ratings,,,,,,,
 ,始計,group,始計,孫子曰：兵者，國之大事，死生之地，存亡之道，不可不察也。,testing,,,active,,,,,,,,,
 ,作戰,group,作戰,故經之以五事，校之以計，而索其情，一曰道，二曰天，三曰地，四曰將，五曰法。,123,,,active,始計,,,,,,,,
-,軍爭,outcome,軍爭,道者，令民與上同意也，可與之死，可與之生，而不畏危。天者，陰陽，寒暑，時制也。地者，遠近，險易，廣狹，死生也。將者，智，信，仁，勇，嚴也。法者，曲制，官道，主用也。凡此五者，將莫不聞，知之者勝，不知者不勝。,456,decaying_average,40,active,始計 作戰,1,地形,2,九地,3,火攻,,
+,軍爭,outcome,軍爭,道者，令民與上同意也，可與之死，可與之生，而不畏危。天者，陰陽，寒暑，時制也。地者，遠近，險易，廣狹，死生也。將者，智，信，仁，勇，嚴也。法者，曲制，官道，主用也。凡此五者，將莫不聞，知之者勝，不知者不勝。,456,decaying_average,40,active,始計 作戰,3,地形,2,九地,1,火攻,,
--- a/spec/lib/outcomes/fixtures/demo.csv
+++ b/spec/lib/outcomes/fixtures/demo.csv
@ -1,4 +1,4 @@
 canvas_id,vendor_guid,object_type,title,description,display_name,calculation_method,calculation_int,workflow_state,parent_guids,ratings,,,,,,,
 ,a,group,A,AAA,display,,,active,,,,,,,,,
 ,b,group,B,BBB,names,,,active,a,,,,,,,,
-,c,outcome,C,CCC,here,decaying_average,40,active,a b,1,Good,2,Better,3,Betterest,,
+,c,outcome,C,CCC,here,decaying_average,40,active,a b,3,Betterest,2,Better,1,Good,,
--- a/spec/lib/outcomes/fixtures/nor.csv
+++ b/spec/lib/outcomes/fixtures/nor.csv
@ -22,4 +22,4 @@ hører han. Og for nu at være et venlig og faderlig Overhode for alle
 opmuntrer Isak dem med at si: Jaja, jeg slaar nu denne Teigen, saa faar
 dokker breie han imorgen! -- Du ser dig vel ikke Raad til at komme ind
 og faa dig Mat? spør Inger overvældet. -- Nei. Jeg har nu andet at
-gjøre! svarer han.",456,decaying_average,40,active,a søren,1,Good,2,Better,3,Betterest,,
+gjøre! svarer han.",456,decaying_average,40,active,a søren,3,Good,2,søren,1,Betterest,,
--- a/spec/lib/outcomes/fixtures/scoring.csv
+++ b/spec/lib/outcomes/fixtures/scoring.csv
@ -1,8 +1,8 @@
 canvas_id,vendor_guid,object_type,title,description,display_name,calculation_method,calculation_int,workflow_state,parent_guids,ratings,,,,,,,
 ,a,group,A,AAA,display,,,active,,,,,,,,,
 ,b,group,B,BBB,names,,,active,a,,,,,,,,
-,c,outcome,C,CCC,here,,,active,a b,1,Good,2,Better,3,Betterest,,
-,d,outcome,D,DDD,here,highest,,active,a,1,Good,2,,,,,
-,e,outcome,E,EEE,here,latest,,active,a,1,Good,2,,,,,
-,f,outcome,F,FFF,here,decaying_average,40,active,b,1,Good,2,,,,,
+,c,outcome,C,CCC,here,,,active,a b,3,Betterest,2,Better,1,Good,,
+,d,outcome,D,DDD,here,highest,,active,a,2,Good,1,,,,,
+,e,outcome,E,EEE,here,latest,,active,a,2,Good,1,,,,,
+,f,outcome,F,FFF,here,decaying_average,40,active,b,2,Good,1,,,,,
 ,g,outcome,G,GGG,here,n_mastery,3,active,b,1,,,,,,,