Check ePortfolios for spam on saving

flag=eportfolio_moderation fixes TALLY-378 Test plan: Setup: - Set some "spam" keywords for the title and content values that you want the callback to use. These are a comma-delimited set of keywords (spaces will be ignored), e.g.: > Setting.set('eportfolio_title_spam_keywords', 'bad,evil,verybad') > Setting.set('eportfolio_content_spam_keywords', 'some,more,words') Testing: For each check below: - "flagged as spam" means the associated Eportfolio object has a spam_status with a value of 'flagged_as_possible_spam' - "not flagged as spam" means the associated object has a spam_status with a value of nil (or, if you previously set it to a different value, it should stay that value) - Log in to Canvas as a student and open ePortfolios from your Account menu ePortfolio title: - Create an ePortfolio with a name that does *not* include any keywords from title_spam_keywords, and save it - It should not be flagged as spam - Create an ePortfolio with a name *including* a keyword defined in title_spam_keywords above, and save it - It should be flagged as spam The checks below all assume you are using an ePortfolio that is *not* marked as spam: you can either create a new one for each step, or re-use an existing one by setting its spam_status to nil as needed. ePortfolio category title: - Create a section that does *not* include any keywords from title_spam_keywords, and save it - The owning ePortfolio should not be flagged - Create a section *including* a keyword defined in title_spam_keywords, and save it - The owning ePortfolio should be flagged ePortfolio page title: - Create a page that does *not* include any keywords from title_spam_keywords, and save it - The owning ePortfolio should not be flagged - Create a page *including* a keyword defined in title_spam_keywords, and save it - The owning ePortfolio should be flagged ePortfolio page content: - Create a new page - Add some text content that does *not* include any keywords from content_spam_keywords (note the different setting!), and save it - The owning ePortfolio should not be flagged - Add some text content that *does* include keywords from content_spam_keywords, and save it - The owning ePortfolio should be flagged Change-Id: I5bdad13558ea8f43a21e07ee978dbb4a023846d6 Reviewed-on: https://gerrit.instructure.com/c/canvas-lms/+/220484 Tested-by: Jenkins Tested-by: Service Cloud Jenkins <svc.cloudjenkins@instructure.com> Reviewed-by: Spencer Olson <solson@instructure.com> Reviewed-by: Gary Mei <gmei@instructure.com> QA-Review: Steve Shepherd <sshepherd@instructure.com> Product-Review: Keith Garner <kgarner@instructure.com>
2019-12-11 12:11:18 -06:00 · 2019-12-11 12:11:18 -06:00 · 6ca715fd57
parent cf4c31ecac
commit 6ca715fd57
7 changed files with 258 additions and 0 deletions
--- a/app/models/eportfolio.rb
+++ b/app/models/eportfolio.rb
@ -22,6 +22,8 @@ class Eportfolio < ActiveRecord::Base
  has_many :eportfolio_entries, :dependent => :destroy
  has_many :attachments, :as => :context, :inverse_of => :context

+  after_save :check_for_spam, if: -> { needs_spam_review? }
+
  belongs_to :user
  validates_presence_of :user_id
  validates_length_of :name, :maximum => maximum_string_length, :allow_blank => true
@ -77,4 +79,34 @@ class Eportfolio < ActiveRecord::Base
    cat
  end
  def self.serialization_excludes; [:uuid]; end
+
+  def title_contains_spam?(title)
+    Eportfolio.spam_criteria_regexp&.match?(title)
+  end
+
+  def flag_as_possible_spam!
+    update!(spam_status: "flagged_as_possible_spam")
+  end
+
+  def needs_spam_review?
+    active? && spam_status.nil? && user.account.feature_enabled?(:eportfolio_moderation)
+  end
+
+  def self.spam_criteria_regexp(type: :title)
+    setting_name = type == :title ? 'eportfolio_title_spam_keywords' : 'eportfolio_content_spam_keywords'
+    spam_keywords = Setting.get(setting_name, '').
+      split(',').
+      map(&:strip).
+      reject(&:empty?)
+    return nil if spam_keywords.blank?
+
+    escaped_keywords = spam_keywords.map { |token| Regexp.escape(token) }
+    /\b(#{escaped_keywords.join('|')})\b/i
+  end
+
+  private
+
+  def check_for_spam
+    flag_as_possible_spam! if title_contains_spam?(name)
+  end
 end
--- a/app/models/eportfolio_category.rb
+++ b/app/models/eportfolio_category.rb
@ -23,6 +23,8 @@ class EportfolioCategory < ActiveRecord::Base
  belongs_to :eportfolio

  before_save :infer_unique_slug
+  after_save :check_for_spam, if: -> { eportfolio.needs_spam_review? }
+
  validates_presence_of :eportfolio_id
  validates_length_of :name, :maximum => maximum_string_length, :allow_blank => true

@ -39,4 +41,10 @@ class EportfolioCategory < ActiveRecord::Base
    end
  end
  protected :infer_unique_slug
+
+  private
+
+  def check_for_spam
+    eportfolio.flag_as_possible_spam! if eportfolio.title_contains_spam?(name)
+  end
 end
--- a/app/models/eportfolio_entry.rb
+++ b/app/models/eportfolio_entry.rb
@ -27,6 +27,8 @@ class EportfolioEntry < ActiveRecord::Base
  acts_as_list :scope => :eportfolio_category
  before_save :infer_unique_slug
  before_save :infer_comment_visibility
+  after_save :check_for_spam, if: -> { eportfolio.needs_spam_review? }
+
  after_save :update_portfolio
  validates_presence_of :eportfolio_id
  validates_presence_of :eportfolio_category_id
@ -161,4 +163,24 @@ class EportfolioEntry < ActiveRecord::Base
      entry.content   = Atom::Content::Html.new(rendered_content)
    end
  end
+
+  private
+
+  def content_contains_spam?
+    content_regexp = Eportfolio.spam_criteria_regexp(type: :content)
+    return if content_regexp.blank?
+
+    content_bodies = content_sections.map do |section|
+      if section.is_a?(String)
+        section
+      elsif section.is_a?(Hash)
+        section[:content]
+      end
+    end
+    content_bodies.compact.any? { |content| content_regexp.match?(content) }
+  end
+
+  def check_for_spam
+    eportfolio.flag_as_possible_spam! if eportfolio.title_contains_spam?(name) || content_contains_spam?
+  end
 end
--- a/config/feature_flags/apogee_release_flags.yml
+++ b/config/feature_flags/apogee_release_flags.yml
@ -33,3 +33,9 @@ new_gradebook_sort_options:
  environments:
    production:
      state: disabled
+eportfolio_moderation:
+  state: hidden
+  applies_to: RootAccount
+  display_name: ePortfolio Moderation
+  description: Flags new ePortfolio content matching specific keywords as possible spam, and allows
+    administrators to either confirm flagged ePortfolios or mark them as safe.
--- a/spec/models/eportfolio_category_spec.rb
+++ b/spec/models/eportfolio_category_spec.rb
@ -0,0 +1,70 @@
+#
+# Copyright (C) 2011 - present Instructure, Inc.
+#
+# This file is part of Canvas.
+#
+# Canvas is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation, version 3 of the License.
+#
+# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Affero General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper.rb')
+
+describe EportfolioCategory do
+  let(:user) { User.create! }
+  let(:eportfolio) { Eportfolio.create!(name: "my file", user: user) }
+  let(:spam_status) { eportfolio.reload.spam_status }
+  let(:category) { eportfolio.eportfolio_categories.create!(name: "my category") }
+
+  describe "callbacks" do
+    describe "#check_for_spam" do
+      context "when the setting has a value and the release flag is enabled" do
+        before(:each) do
+          user.account.root_account.enable_feature!(:eportfolio_moderation)
+          Setting.set('eportfolio_title_spam_keywords', 'bad, verybad, worse')
+        end
+
+        it "marks the owning portfolio as possible spam when the title matches one or more keywords" do
+          category.update!(name: "my bad category")
+          expect(spam_status).to eq "flagged_as_possible_spam"
+        end
+
+        it "does not mark as spam when the title matches no keywords" do
+          expect {
+            category.update!(name: "my great and notbad category")
+          }.not_to change { spam_status }
+        end
+
+        it "does not mark as spam if a spam_status already exists" do
+          eportfolio.update!(spam_status: "marked_as_safe")
+
+          expect {
+            category.update!(name: "actually a bad category")
+          }.not_to change { spam_status }
+        end
+      end
+
+      it "does not attempt to mark as spam when the setting is empty" do
+        user.account.root_account.enable_feature!(:eportfolio_moderation)
+        expect {
+          category.update!(name: "actually a bad category")
+        }.not_to change { spam_status }
+      end
+
+      it "does not attempt to mark as spam when the release flag is not enabled" do
+        Setting.set('eportfolio_title_spam_keywords', 'bad, verybad, worse')
+        expect {
+          eportfolio.update!(name: "actually a bad page")
+        }.not_to change { spam_status }
+      end
+    end
+  end
+end
--- a/spec/models/eportfolio_entry_spec.rb
+++ b/spec/models/eportfolio_entry_spec.rb
@ -145,4 +145,76 @@ describe EportfolioEntry do
      @eportfolio_entry.save!
      expect(@eportfolio.updated_at.to_i).not_to eq(old_time.to_i)
    end
+
+  describe "callbacks" do
+    before(:once) do
+      eportfolio_model
+    end
+
+    describe "#check_for_spam" do
+      let(:spam_status) { @eportfolio.reload.spam_status }
+      let(:entry) { @eportfolio_entry }
+
+      context "when the setting has a value and the release flag is enabled" do
+        before(:each) do
+          @user.account.root_account.enable_feature!(:eportfolio_moderation)
+          Setting.set('eportfolio_title_spam_keywords', 'bad, verybad, worse')
+          Setting.set('eportfolio_content_spam_keywords', 'injurious,deleterious')
+        end
+
+        it "marks the owning portfolio as possible spam when the title matches any title keywords" do
+          entry.update!(name: "my verybad page")
+          expect(spam_status).to eq "flagged_as_possible_spam"
+        end
+
+        it "marks the owning portfolio as possible spam when a content section matches any content keywords" do
+          entry.parse_content(
+            section_count: 1,
+            section_1: {
+              section_type: "html",
+              content: "<p>This is my deleterious page</p>"
+            }
+          )
+          entry.save!
+          expect(spam_status).to eq "flagged_as_possible_spam"
+        end
+
+        it "does not mark as spam when neither the title nor the content match their respective offending keywords" do
+          expect {
+            entry.update!(name: "my injurious page")
+            entry.parse_content(
+              section_count: 1,
+              section_1: {
+                section_type: "html",
+                content: "<p>This is my bad page</p>"
+              }
+            )
+            entry.save!
+          }.not_to change { spam_status }
+        end
+
+        it "does not mark as spam if a spam_status already exists" do
+          @eportfolio.update!(spam_status: "marked_as_safe")
+
+          expect {
+            entry.update!(name: "actually a bad page")
+          }.not_to change { spam_status }
+        end
+      end
+
+      it "does not attempt to mark as spam when the setting is empty" do
+        @user.account.root_account.enable_feature!(:eportfolio_moderation)
+        expect {
+          entry.update!(name: "actually a bad page")
+        }.not_to change { spam_status }
+      end
+
+      it "does not attempt to mark as spam when the release flag is not enabled" do
+        Setting.set('eportfolio_title_spam_keywords', 'bad, verybad, worse')
+        expect {
+          entry.update!(name: "actually a bad page")
+        }.not_to change { spam_status }
+      end
+    end
+  end
 end
--- a/spec/models/eportfolio_spec.rb
+++ b/spec/models/eportfolio_spec.rb
@ -71,4 +71,52 @@ describe Eportfolio do
      expect(@category.reload.eportfolio_entries).not_to be_empty
    end
  end
+
+  describe "callbacks" do
+    describe "#check_for_spam" do
+      let(:user) { User.create! }
+      let(:eportfolio) { Eportfolio.create!(name: "my file", user: user) }
+      let(:spam_status) { eportfolio.reload.spam_status }
+
+      context "when the setting has a value and the release flag is enabled" do
+        before(:each) do
+          user.account.root_account.enable_feature!(:eportfolio_moderation)
+          Setting.set('eportfolio_title_spam_keywords', 'bad, verybad, worse')
+        end
+
+        it "marks as possible spam when the title matches one or more keywords" do
+          eportfolio.update!(name: "my verybad page")
+          expect(spam_status).to eq "flagged_as_possible_spam"
+        end
+
+        it "does not mark as spam when the title matches no keywords" do
+          expect {
+            eportfolio.update!(name: "my great and notbad page")
+          }.not_to change { spam_status }
+        end
+
+        it "does not mark as spam if a spam_status already exists" do
+          eportfolio.update!(spam_status: "marked_as_safe")
+
+          expect {
+            eportfolio.update!(name: "actually a bad page")
+          }.not_to change { spam_status }
+        end
+      end
+
+      it "does not attempt to mark as spam when the setting is empty" do
+        user.account.root_account.enable_feature!(:eportfolio_moderation)
+        expect {
+          eportfolio.update!(name: "actually a bad page")
+        }.not_to change { spam_status }
+      end
+
+      it "does not attempt to mark as spam when the release flag is not enabled" do
+        Setting.set('eportfolio_title_spam_keywords', 'bad, verybad, worse')
+        expect {
+          eportfolio.update!(name: "actually a bad page")
+        }.not_to change { spam_status }
+      end
+    end
+  end
 end