switch yaml engine to psych

closes #CNVS-27229

Change-Id: I257718e3e4603a0e8a25519ab10247f26d57394b
Reviewed-on: https://gerrit.instructure.com/77480
Tested-by: Jenkins
Reviewed-by: Cody Cutrer <cody@instructure.com>
Product-Review: James Williams  <jamesw@instructure.com>
QA-Review: James Williams  <jamesw@instructure.com>
This commit is contained in:
James Williams 2016-04-20 09:19:44 -06:00
parent 8640d7b7c6
commit 75f21c55b2
12 changed files with 87 additions and 413 deletions

View File

@ -43,7 +43,7 @@ class SubmissionVersion < ActiveRecord::Base
begin
return nil unless Submission.where(:id => version.versionable_id).exists?
version.model
rescue ArgumentError
rescue Psych::SyntaxError
return nil
end
else

View File

@ -1,10 +1,16 @@
class BeginPsychMigration < ActiveRecord::Migration
tag :postdeploy
tag :predeploy
disable_ddl_transaction!
def runnable?
Shard.current.default?
end
def up
DataFixup::PsychMigration.run if CANVAS_RAILS4_0 || !Rails.env.test?
if User.exists? # don't raise for a fresh install
raise "WARNING:\n
This migration needs to be run with the release/2016-04-23 version of canvas-lms to
change all yaml columns in the database to a Psych compatible format.\n"
end
end
def down

View File

@ -0,0 +1,10 @@
class CleanseTheSyckness < ActiveRecord::Migration
tag :postdeploy
def up
DataFixup::SycknessCleanser.send_later_if_production_enqueue_args(:run, {:strand => Shard.current.database_server.id.to_s})
end
def down
end
end

View File

@ -20,20 +20,15 @@
# and before canvas-jobs
# We need to make sure that safe_yaml is loaded *after* the YAML engine
# is switched to Syck. Otherwise we
# is switched to Psych. Otherwise we
# won't have access to (safe|unsafe)_load.
require 'yaml'
require 'syck'
YAML::ENGINE.yamler = 'syck' if defined?(YAML::ENGINE)
require 'syck' # so we can undo all the things before something else requires it
YAML::ENGINE.yamler = 'psych' if defined?(YAML::ENGINE)
require 'safe_yaml'
trusted_tags = SafeYAML::TRUSTED_TAGS.dup
trusted_tags << 'tag:yaml.org,2002:merge'
SafeYAML.send(:remove_const, :TRUSTED_TAGS)
SafeYAML.const_set(:TRUSTED_TAGS, trusted_tags.freeze)
module FixSafeYAMLNullMerge
def merge_into_hash(hash, array)
return unless array
@ -49,25 +44,6 @@ SafeYAML::OPTIONS.merge!(
# This tag whitelist is syck specific. We'll need to tweak it when we upgrade to psych.
# See the tests in spec/lib/safe_yaml_spec.rb
whitelisted_tags: %w[
tag:ruby.yaml.org,2002:symbol
tag:yaml.org,2002:float
tag:yaml.org,2002:float#exp
tag:yaml.org,2002:float#inf
tag:yaml.org,2002:str
tag:yaml.org,2002:timestamp
tag:yaml.org,2002:timestamp#iso8601
tag:yaml.org,2002:timestamp#spaced
tag:yaml.org,2002:map:HashWithIndifferentAccess
tag:yaml.org,2002:map:ActiveSupport::HashWithIndifferentAccess
tag:ruby.yaml.org,2002:object:Class
tag:ruby.yaml.org,2002:object:OpenStruct
tag:ruby.yaml.org,2002:object:Scribd::Document
tag:ruby.yaml.org,2002:object:Mime::Type
tag:ruby.yaml.org,2002:object:URI::HTTP
tag:ruby.yaml.org,2002:object:URI::HTTPS
tag:ruby.yaml.org,2002:object:OpenObject
tag:yaml.org,2002:map:WeakParameters
] + %w[
!ruby/symbol
!binary
!float
@ -96,43 +72,6 @@ SafeYAML::OPTIONS.merge!(
module Syckness
TAG = "#GETDOWNWITHTHESYCKNESS\n"
module SyckDumper
def dump(*args)
Psych.dump(*args)
end
end
module PsychDumper
def dump(*args)
yaml = super
yaml += TAG if yaml.is_a?(String)
yaml
end
end
module SafeLoader
require "safe_yaml/psych_resolver"
require "safe_yaml/safe_to_ruby_visitor"
def load(yaml, *args)
if yaml.is_a?(String) && yaml.end_with?(TAG)
SafeYAML::PsychResolver.new.resolve_node(Psych.parse(yaml))
else
super
end
end
end
module UnsafeLoader
def unsafe_load(yaml, *args)
if yaml.is_a?(String) && yaml.end_with?(TAG)
Psych.load(yaml)
else
super
end
end
end
end
[Object, Hash, Struct, Array, Exception, String, Symbol, Range, Regexp, Time,
@ -142,11 +81,6 @@ end
end
end
Syck.singleton_class.prepend(Syckness::SyckDumper)
Psych.singleton_class.prepend(Syckness::PsychDumper)
SafeYAML.singleton_class.prepend(Syckness::SafeLoader)
YAML.singleton_class.prepend(Syckness::UnsafeLoader)
SafeYAML::PsychResolver.class_eval do
attr_accessor :aliased_nodes
end

View File

@ -1,166 +0,0 @@
require "safe_yaml/psych_resolver"
require "safe_yaml/safe_to_ruby_visitor"
module DataFixup::PsychMigration
class << self
def run
raise "Rails 4.0 specific" unless CANVAS_RAILS4_0
columns_hash.each do |model, columns|
next if model.shard_category == :unsharded && Shard.current != Shard.default
if ranges = id_ranges(model)
ranges.each do |start_at, end_at|
queue_migration(model, columns, start_at, end_at)
end
end
end
end
def queue_migration(model, columns, start_at, end_at)
args = [model, columns, start_at, end_at]
if run_immediately?
self.migrate_yaml(nil, *args)
elsif Account.site_admin
progress = Progress.create!(:context => Account.site_admin, :tag => "psych_migration")
progress.set_results({:model_name => model.name, :start_at => start_at, :end_at => end_at})
progress.process_job(self, :migrate_yaml, {:n_strand => ["psych_migration", Shard.current.database_server.id],
:priority => Delayed::MAX_PRIORITY, :max_attempts => 1}, *args)
else
self.send_later_enqueue_args(:migrate_yaml, {:n_strand => ["psych_migration", Shard.current.database_server.id],
:priority => Delayed::MAX_PRIORITY, :max_attempts => 1}, nil, *args)
end
end
def run_immediately?
!Rails.env.production?
end
def migrate_yaml(progress, model, columns, start_at, end_at)
if progress
progress.set_results(progress.results.merge(:time_started_at => Time.now.utc))
end
total_count = 0
unparsable_count = 0
changed_count = 0
use_shard_id = (model <= Delayed::Backend::ActiveRecord::Job) && model.column_names.include?('shard_id')
model.find_ids_in_ranges(:batch_size => 50, :start_at => start_at, :end_at => end_at) do |min_id, max_id|
# rename the columns on the pluck so that Rails doesn't silently deserialize them for us
to_pluck = columns.map { |c| "#{c} AS #{c}1" }
to_pluck << :shard_id if use_shard_id
model.transaction do
rows = model.shard(Shard.current).where(model.primary_key => min_id..max_id).lock(:no_key_update).pluck(model.primary_key, *to_pluck)
rows.each do |row|
changes = {}
shard = (use_shard_id && Shard.lookup(row.pop)) || Shard.current
shard.activate do
columns.each_with_index do |column, i|
value = row[i + 1]
next if value.nil? || value.end_with?(Syckness::TAG)
obj_from_syck = begin
YAML.unsafe_load(value)
rescue
unparsable_count += 1
nil
end
if obj_from_syck
obj_from_psych = Psych.load(value) rescue nil
if obj_from_syck != obj_from_psych
Utf8Cleaner.recursively_strip_invalid_utf8!(obj_from_syck)
new_yaml = YAML.dump(obj_from_syck)
if YAML.unsafe_load(new_yaml) != obj_from_syck
# make a final check because better safe than sorry
raise "oh noes something very very bad happened! Psych roundtrip check failed - shard: #{shard.id}, table: #{model.table_name}, key: #{row.first}"
end
changes[column] = new_yaml
end
end
end
end
next if changes.empty?
model.where(model.primary_key => row.first).update_all(changes)
changed_count += 1
end
total_count += rows.count
end
end
if progress
progress.set_results(progress.results.merge(:time_finished_at => Time.now.utc, :successful => true, :total_count => total_count,
:changed_count => changed_count, :unparsable_count => unparsable_count))
end
end
def columns_hash
result = ActiveRecord::Base.all_models.map do |model|
next unless model.superclass == ActiveRecord::Base
next if model.name == 'RemoveQuizDataIds::QuizQuestionDataMigrationARShim'
attributes = model.serialized_attributes.select do |attr, coder|
coder.is_a?(ActiveRecord::Coders::YAMLColumn)
end
next if attributes.empty?
[model, attributes.keys]
end.compact.to_h
result[Version] = ['yaml']
result[Delayed::Backend::ActiveRecord::Job] = ['handler']
result[Delayed::Backend::ActiveRecord::Job::Failed] = ['handler']
result
end
MODEL_RANGE_MAP = {
'AssessmentQuestion' => 100_000,
'ContextModuleProgression' => 100_000,
'ErrorReport' => 100_000,
'Quizzes::Quiz' => 10_000,
'Quizzes::QuizQuestion' => 10_000,
'Quizzes::QuizSubmission' => 5_000,
'Quizzes::QuizSubmissionSnapshot' => 5_000,
'Version' => 10_000
}
def range_size(model)
MODEL_RANGE_MAP[model.name] || 500_000
end
def id_ranges(model)
# try to partition off ranges of ids in the table with at most 500,000 ids per partition
unless model.primary_key == "id"
return model.exists? ? [[nil, nil]] : false
end
ranges = []
scope = model.shard(Shard.current)
start_id = scope.minimum(:id)
return false unless start_id
current_min = start_id
size = range_size(model)
while current_min
current_max = current_min + size - 1
next_min = scope.where("id > ?", current_max).minimum(:id)
if next_min
ranges << [current_min, current_max]
elsif !next_min && ranges.any?
ranges << [current_min, nil] # grab all the rest of the rows if we're at the end - including shadow objects
end
current_min = next_min
end
ranges.any? ? ranges : [[nil, nil]]
end
end
end

View File

@ -0,0 +1,41 @@
module DataFixup
module SycknessCleanser
def self.columns_hash
result = ActiveRecord::Base.all_models.map do |model|
next unless model.superclass == ActiveRecord::Base
next if model.name == 'RemoveQuizDataIds::QuizQuestionDataMigrationARShim'
attributes = model.serialized_attributes.select do |attr, coder|
coder.is_a?(ActiveRecord::Coders::YAMLColumn)
end
next if attributes.empty?
[model, attributes.keys]
end.compact.to_h
result[Version] = ['yaml']
result[Delayed::Backend::ActiveRecord::Job] = ['handler']
result[Delayed::Backend::ActiveRecord::Job::Failed] = ['handler']
result
end
def self.run
self.columns_hash.each do |model, columns|
update_sqls = {}
quoted = {}
columns.each do |column|
quoted[column] = model.connection.quote_column_name(column)
update_sqls[column] = "#{quoted[column]} = left(#{quoted[column]}, -#{Syckness::TAG.length})"
end
model.find_ids_in_ranges do |min_id, max_id|
columns.each do |column|
model.where(model.primary_key => min_id..max_id).where("#{quoted[column]} LIKE ?", "%#{Syckness::TAG}").update_all(update_sqls[column])
end
delay = Setting.get("syckness_sleeping_ms", nil)
if delay
sleep(delay.to_i.fdiv(1000)) # just in case the cleansing is a little too strong
end
end
end
end
end
end

View File

@ -1,27 +0,0 @@
#
# Copyright (C) 2013 Instructure, Inc.
#
# This file is part of Canvas.
#
# Canvas is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, version 3 of the License.
#
# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
describe "canvas_stringex" do
it "requires the syck yaml parsers" do
expect(YAML).to eq Syck
end
end

View File

@ -150,18 +150,6 @@ YAML
expect(YAML.load(psych_yaml)).to eq old_result
end
it "should seamlessly dump yaml into a psych-compatible format (and be cross-compatible)" do
yaml = "--- \nsadness: \"\\xF0\\x9F\\x98\\x82\"\n"
hash = YAML.load(yaml)
psych_dump = "---\nsadness: \"\\U0001F602\"\n#{Syckness::TAG}"
expect(hash.to_yaml).to eq psych_dump
expect(YAML.dump(hash)).to eq psych_dump
expect(YAML.load(psych_dump)).to eq hash
expect(YAML.unsafe_load(psych_dump)).to eq hash
end
it "should work with aliases" do
hash = {:a => 1}.with_indifferent_access
obj = {:blah => hash, :bloop => hash}.with_indifferent_access

View File

@ -25,6 +25,7 @@ describe DataFixup::FixRubricAssessmentYAML do
old_data = @assessment.data
bad_yaml = RubricAssessment.where(:id => @assessment).pluck("data as d").first.gsub(":comments_html: !str", ":comments_html:")
bad_yaml += Syckness::TAG # for old times' sake
bad_data = YAML.load(bad_yaml)
expect(bad_data.first[:comments_html]).to_not eq "yes" # it's reading it as a boolean
RubricAssessment.where(:id => @assessment).update_all(['data = ?', bad_yaml])

View File

@ -1,134 +0,0 @@
require_relative '../sharding_spec_helper'
describe DataFixup::PsychMigration do
before :each do
skip("Rails 4.0 specific") unless CANVAS_RAILS4_0
end
let(:bad_yaml) { "--- \nsadness: \"\\xF0\\x9F\\x98\\x82\"\n"}
let(:fixed_yaml) { "---\nsadness: \"\\U0001F602\"\n#{Syckness::TAG}" }
it "should translate yaml in serialized columns into a psych compatible state" do
user
User.where(:id => @user).update_all(:preferences => bad_yaml)
DataFixup::PsychMigration.run
yaml = User.where(:id => @user).pluck("preferences AS p1").first
expect(yaml).to eq fixed_yaml
end
it "should fix all the columns" do
course
Course.where(:id => @course).update_all(:tab_configuration => bad_yaml, :settings => bad_yaml)
DataFixup::PsychMigration.run
yamls = Course.where(:id => @course).pluck("tab_configuration AS c1, settings AS c2").first
expect(yamls).to eq [fixed_yaml, fixed_yaml]
end
it "should queue a job with a progress model on production" do
Account.where(:id => Account.default).update_all(:settings => bad_yaml)
user
User.where(:id => @user).update_all(:preferences => bad_yaml)
DataFixup::PsychMigration.stubs(:run_immediately?).returns(false)
DataFixup::PsychMigration.run
expect(User.where(:id => @user).pluck("preferences AS p1").first).to eq bad_yaml # should not have run yet
progresses = Progress.where(:tag => 'psych_migration').to_a
expected = DataFixup::PsychMigration.columns_hash.keys.select{|m| m.shard(Shard.current).exists?}.map(&:name)
expect(progresses.map{|prog| prog.results[:model_name]}).to match_array(expected)
progress = progresses.detect{|prog| prog.results[:model_name] == "User"}
expect(progress).to be_queued
run_jobs
progresses.each do |prog|
prog.reload
expect(prog).to be_completed
end
expect(progress.results[:successful]).to be_truthy
expect(progress.results[:changed_count]).to eq 1
yaml = User.where(:id => @user).pluck("preferences AS p1").first
expect(yaml).to eq fixed_yaml
yaml2 = Account.where(:id => Account.default).pluck("settings AS s1").first
expect(yaml2).to eq fixed_yaml
end
it "should split into multiple jobs with id ranges if needed" do
skip "needs AR jobs" unless Delayed::Job == Delayed::Backend::ActiveRecord::Job
users = []
8.times do
user
users << @user
end
User.where(:id => users).update_all(:preferences => bad_yaml)
DataFixup::PsychMigration.stubs(:run_immediately?).returns(false)
DataFixup::PsychMigration.stubs(:range_size).returns(3)
DataFixup::PsychMigration.run
user_progresses = Progress.where(:tag => 'psych_migration').to_a.select{|prog| prog.results[:model_name] == "User"}
expect(user_progresses.count).to eq 3
ranges = user_progresses.map{|prog| [prog[:results][:start_at], prog[:results][:end_at]]}
expect(ranges).to match_array [
[users[0].id, users[2].id],
[users[3].id, users[5].id],
[users[6].id, nil]
]
prog = user_progresses.detect{|prog| prog[:results][:start_at] == users[3].id}
job = Delayed::Job.where("handler LIKE ?", "%ActiveRecord:Progress #{prog.id}\n%").first
run_job(job)
prog.reload
expect(prog).to be_completed
expect(prog.results[:changed_count]).to eq 3
fixed_users = [users[3], users[4], users[5]]
expect(User.where(:id => fixed_users).pluck("preferences AS p1")).to eq ([fixed_yaml] * 3)
expect(User.where.not(:id => fixed_users).pluck("preferences AS p1")).to eq ([bad_yaml] * 5) # should not have run everywhere else
run_jobs
expect(User.where(:id => users).pluck("preferences AS p1")).to eq ([fixed_yaml] * 8)
end
context "cross-shard" do
specs_require_sharding
it "should deserialize job data on the correct shard" do
skip "needs job shard id" unless Delayed::Job == Delayed::Backend::ActiveRecord::Job && Delayed::Job.column_names.include?('shard_id')
@utf_arg = "\xF0\x9F\x98\x82"
@shard1.stubs(:delayed_jobs_shard).returns(Shard.default)
@shard1.activate do
user
@user.send_later(:save, @utf_arg)
end
job = Delayed::Job.where("handler LIKE ?", "%ActiveRecord:User #{@user.local_id}\n%").first
expect(job.shard_id).to eq @shard1.id
fixed = job.handler
broken = fixed.sub("\"\\U0001F602\"", "\"\\xF0\\x9F\\x98\\x82\"").sub(Syckness::TAG, "") # reconvert back into syck format for the spec
Delayed::Job.where(:id => job).update_all(:handler => broken)
expect(job.reload.handler).to eq broken
DataFixup::PsychMigration.run
expect(job.reload.handler).to eq fixed
end
end
end

View File

@ -0,0 +1,21 @@
require_relative "../spec_helper"
describe DataFixup::SycknessCleanser do
it "should remove the syckness" do
user
@user.preferences = {:bloop => "blah"}
@user.save!
old_yaml = User.where(:id => @user).pluck("preferences as y").first
new_yaml = old_yaml + Syckness::TAG
User.where(:id => @user).update_all(["preferences = ?", new_yaml])
DataFixup::SycknessCleanser.run
expect(User.where(:id => @user).pluck("preferences as y").first).to eq old_yaml
DataFixup::SycknessCleanser.run # make sure it doesn't break anything just in case
expect(User.where(:id => @user).pluck("preferences as y").first).to eq old_yaml
end
end

View File

@ -76,7 +76,7 @@ describe SubmissionVersion do
context "invalid yaml" do
before do
@version.update_attribute(:yaml, "--- \n- 1\n- 2\n-")
@version.update_attribute(:yaml, "--- \n- 1\n- 2\n--3")
end
it "should error on invalid yaml by default" do