From dafe965de016cb9f59fb8006bb946bb7a55c3fc3 Mon Sep 17 00:00:00 2001 From: Ethan Vizitei Date: Wed, 24 Feb 2021 15:39:39 -0600 Subject: [PATCH] extract canvas::redis to canvas_cache refs FOO-1636 flag=none [fsc-propagate=false] - add gem initializer to canvas for canvas_cache - move Canvas::Redis into canvas_cache - alias module in canvas - expand dependencies of canvas_cache - add redis dependency to vendored gem environment - extract RedisConnections to deal with disconnect/clear_idle TEST PLAN: 1) continue to cache things in redis 2) caching behavior with canvas-extensions should not change Change-Id: I96383eced80d670e14e535993f0c0608ab5f6520 Reviewed-on: https://gerrit.instructure.com/c/canvas-lms/+/259434 Tested-by: Service Cloud Jenkins Reviewed-by: Simon Williams QA-Review: Ethan Vizitei Product-Review: Ethan Vizitei --- config/initializers/canvas_cache.rb | 30 ++ docker-compose.new-jenkins.vendored-gems.yml | 4 + gems/canvas_cache/Gemfile | 2 + gems/canvas_cache/README.md | 35 ++ gems/canvas_cache/canvas_cache.gemspec | 12 + gems/canvas_cache/lib/canvas_cache.rb | 43 ++ gems/canvas_cache/lib/canvas_cache/redis.rb | 393 ++++++++++++++++++ .../spec/canvas_cache/redis_spec.rb | 389 +++++++++++++++++ .../spec/fixtures/config/redis.yml | 12 + gems/canvas_cache/spec/spec_helper.rb | 112 ++++- gems/config_file/lib/config_file.rb | 2 + lib/canvas.rb | 6 +- lib/canvas/redis.rb | 387 +---------------- lib/canvas/redis_connections.rb | 79 ++++ spec/lib/canvas/redis_connections_spec.rb | 39 ++ spec/lib/canvas/redis_spec.rb | 320 +------------- 16 files changed, 1175 insertions(+), 690 deletions(-) create mode 100644 config/initializers/canvas_cache.rb create mode 100644 gems/canvas_cache/lib/canvas_cache/redis.rb create mode 100644 gems/canvas_cache/spec/canvas_cache/redis_spec.rb create mode 100644 gems/canvas_cache/spec/fixtures/config/redis.yml create mode 100644 lib/canvas/redis_connections.rb create mode 100644 spec/lib/canvas/redis_connections_spec.rb diff --git a/config/initializers/canvas_cache.rb b/config/initializers/canvas_cache.rb new file mode 100644 index 00000000000..54df4789b2b --- /dev/null +++ b/config/initializers/canvas_cache.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +# Copyright (C) 2021 - present Instructure, Inc. +# +# This file is part of Canvas. +# +# Canvas is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, version 3 of the License. +# +# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see . + +# to avoid having to pull this out as a full engine yet. +# we may extract Setting into an engine or an abstract gem +# or something, for now this pattern breaks the explicit dependency. +CanvasCache.settings_store = Setting + +# It would be great to pull out Canvas::Errors as a gem, and we should do so. +# The problem is it has a dependency on RequestContextGenerator, which +# has a dependency on Canvas::Security, which has a dependency on +# our caching, and so we have to break the chain somewhere. We're starting +# with the caching. TODO: Once canvas errors is out on it's own we can let +# CanvasCache take a dependency on it directly and forego this injection. +CanvasCache.on_captured_error = ->(e){ Canvas::Errors.capture_exception(:redis, e, :warn) } \ No newline at end of file diff --git a/docker-compose.new-jenkins.vendored-gems.yml b/docker-compose.new-jenkins.vendored-gems.yml index 7bcaee4edef..a2bfab797d4 100644 --- a/docker-compose.new-jenkins.vendored-gems.yml +++ b/docker-compose.new-jenkins.vendored-gems.yml @@ -5,3 +5,7 @@ services: environment: DATABASE_URL: postgres://postgres:${POSTGRES_PASSWORD}@postgres:5432/canvas_test init: true + + redis: + image: starlord.inscloudgate.net/jenkins/redis:alpine + init: true diff --git a/gems/canvas_cache/Gemfile b/gems/canvas_cache/Gemfile index fa75df15632..157d73db977 100644 --- a/gems/canvas_cache/Gemfile +++ b/gems/canvas_cache/Gemfile @@ -1,3 +1,5 @@ source 'https://rubygems.org' gemspec + +gem 'config_file', path: '../config_file' \ No newline at end of file diff --git a/gems/canvas_cache/README.md b/gems/canvas_cache/README.md index de0365ea5fe..a428c83f421 100644 --- a/gems/canvas_cache/README.md +++ b/gems/canvas_cache/README.md @@ -7,6 +7,41 @@ For all those things you don't want to go lookup again. There are a collection of caching helpers in this library for different enhancements for the canvas system to help us scale effectively. +#### Redis Client Management + +canvas_cache knows where the redis config info is +and whether how the local environment is setup. You +can ask from anywhere: + +```ruby +CanvasCache::Redis.enabled? +``` + +You can access the distrbitued redis client directly +off the module: + +```ruby +CanvasCache::Redis.redis.set("key", "value") +``` + +If your config file has "servers: 'cache_store'", it will +just give you a vanilla Rails.cache.redis instance, but +if you give it a set of servers to work with in the config +it will construct a patched version of the redis client. + +It works pretty much like a standard redis client, but +there are several enhancements that this library makes to the +way redis caching works. + + - uses the configurable HashRing described below for distributed configs + - includes some safety checks to prevent you from accidentally dropping your + production cache with a "flushdb" + - performs logging with elapsed-time in the output + - uses a circuit breaker to tamp down on redis traffic if it gets + connectivity issues. + + + #### Consistent Hashing on redis with Digest Injection when spreading cache keys over a ring of nodes, you don't diff --git a/gems/canvas_cache/canvas_cache.gemspec b/gems/canvas_cache/canvas_cache.gemspec index d94a37f8660..9cda01361cd 100644 --- a/gems/canvas_cache/canvas_cache.gemspec +++ b/gems/canvas_cache/canvas_cache.gemspec @@ -14,8 +14,20 @@ Gem::Specification.new do |spec| spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) spec.require_paths = ["lib"] + spec.add_dependency 'config_file' + spec.add_dependency 'rails' + + # redis things required in canvas + spec.add_dependency 'redis', '>= 4.1.0' + spec.add_dependency 'redis-scripting', '>= 1.0.0' + spec.add_dependency 'digest-murmurhash', '>= 1.1.0' + + spec.add_dependency 'guardrail', '>= 2.0.0' + spec.add_dependency 'inst_statsd', '>= 2.1.0' + spec.add_development_dependency 'bundler' spec.add_development_dependency 'rspec' spec.add_development_dependency 'byebug' + spec.add_development_dependency 'timecop' end \ No newline at end of file diff --git a/gems/canvas_cache/lib/canvas_cache.rb b/gems/canvas_cache/lib/canvas_cache.rb index cc03d9e1229..a78b61a74b8 100644 --- a/gems/canvas_cache/lib/canvas_cache.rb +++ b/gems/canvas_cache/lib/canvas_cache.rb @@ -18,5 +18,48 @@ # with this program. If not, see . # module CanvasCache + require 'canvas_cache/redis' require 'canvas_cache/hash_ring' + + class UnconfiguredError < StandardError; end + + # TODO: Maybe at one point Setting will be + # a gem on it's own or some other dependable module. + # For the moment, this is a convenient way to inject + # this base class without needing to depend on it directly. + mattr_writer :settings_store + + # this is expected to be a lambda which can be invoked + # and passed an exception object if the caching library + # catches it but decides not to deal with it + mattr_writer :on_captured_error + + # Expected interface for this object is: + # object.get(setting_name, 'default_value') # [ returning a string ] + # + # object.skip_cache do # [ returning an uncached read of the string value ] + # object.get(setting_name, 'default_value') + # end + # + # In this instance, it's expected that canvas is going to inject + # the Setting class, but we want to break depednencies that directly + # point to canvas. + def self.settings_store(safe_invoke=false) + return @@settings_store if @@settings_store + return nil if safe_invoke + + raise UnconfiguredError, "an object with an interface for loading settings must be specified as 'settings_store'" + end + + # It would be great to pull out Canvas::Errors as a gem, and we should do so. + # The problem is it has a dependency on RequestContextGenerator, which + # has a dependency on Canvas::Security, which has a dependency on + # our caching, and so we have to break the chain somewhere. We're starting + # with the caching. TODO: Once canvas errors is out on it's own we can let + # CanvasCache take a dependency on it directly and forego this touchpoint. + def self.invoke_on_captured_error(exception) + return if @@on_captured_error.nil? + + @@on_captured_error.call(exception) + end end \ No newline at end of file diff --git a/gems/canvas_cache/lib/canvas_cache/redis.rb b/gems/canvas_cache/lib/canvas_cache/redis.rb new file mode 100644 index 00000000000..056a6d63f72 --- /dev/null +++ b/gems/canvas_cache/lib/canvas_cache/redis.rb @@ -0,0 +1,393 @@ +# frozen_string_literal: true + +# +# Copyright (C) 2018 - present Instructure, Inc. +# +# This file is part of Canvas. +# +# Canvas is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, version 3 of the License. +# +# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see . +# + +require 'zlib' +require 'config_file' +require 'active_support/core_ext/object/blank' +require 'inst_statsd' +require 'guard_rail' +require 'redis' + +# see https://github.com/redis/redis-rb/pull/739 + +module CanvasCache + module Redis + def self.redis + raise "Redis is not enabled for this install" unless CanvasCache::Redis.enabled? + + conf = CanvasCache::Redis.config + if conf == 'cache_store' || conf.is_a?(Hash) && conf['servers'] == 'cache_store' + return Rails.cache.redis + end + + @redis ||= begin + CanvasCache::Redis.patch + settings = ConfigFile.load('redis').dup + settings['url'] = settings['servers'] if settings['servers'] + ActiveSupport::Cache::RedisCacheStore.build_redis(**settings.to_h.symbolize_keys) + end + end + + def self.config + @config ||= ConfigFile.load('redis') + end + + def self.enabled? + @enabled ||= self.config.present? + end + + def self.disconnect! + return unless @redis + # We're sharing redis connections between CanvasCache::Redis.redis and Rails.cache, + # so don't call disconnect on the cache too. + return if Rails.cache.respond_to?(:redis) && @redis == Rails.cache.redis + @redis = nil + end + + class << self + alias_method :redis_config, :config + alias_method :redis_enabled?, :enabled? + end + + def self.reset_config! + @config = nil + @enabled = nil + end + + def self.settings_store + CanvasCache.settings_store + end + + # try to grab a lock in Redis, returning false if the lock can't be held. If + # the lock is grabbed and `ttl` is given, it'll be set to expire after `ttl` + # seconds. + def self.lock(key, ttl = nil) + return true unless CanvasCache::Redis.enabled? + + full_key = lock_key(key) + if CanvasCache::Redis.redis.setnx(full_key, 1) + CanvasCache::Redis.redis.expire(full_key, ttl.to_i) if ttl + true + else + # key is already used + false + end + end + + # unlock a previously grabbed Redis lock. This doesn't do anything to verify + # that this process took the lock. + def self.unlock(key) + CanvasCache::Redis.redis.del(lock_key(key)) + true + end + + def self.lock_key(key) + "lock:#{key}" + end + + def self.ignore_redis_failures? + settings_store.get('ignore_redis_failures', 'true') == 'true' + end + + COMPACT_LINE = "Redis (%{request_time_ms}ms) %{command} %{key} [%{host}]" + def self.log_style + # supported: 'off', 'compact', 'json' + @log_style ||= ConfigFile.load('redis')&.[]('log_style') || 'compact' + end + + def self.redis_failure?(redis_name) + return false unless last_redis_failure[redis_name] + # i feel this dangling rescue is justifiable, given the try-to-be-failsafe nature of this code + if redis_name =~ /localhost/ + # talking to local redis should not short ciruit as long + return (Time.zone.now - last_redis_failure[redis_name]) < (settings_store.get('redis_local_failure_time', '2').to_i rescue 2) + end + + (Time.zone.now - last_redis_failure[redis_name]) < (settings_store.get('redis_failure_time', '300').to_i rescue 300) + end + + def self.last_redis_failure + @last_redis_failure ||= {} + end + + def self.reset_redis_failure + @last_redis_failure = {} + end + + def self.handle_redis_failure(failure_retval, redis_name) + settings_store.skip_cache do + if redis_failure?(redis_name) + Rails.logger.warn(" [REDIS] Short circuiting due to recent redis failure (#{redis_name})") + return failure_retval + end + end + reply = yield + raise reply if reply.is_a?(Exception) + + reply + rescue ::Redis::BaseConnectionError, SystemCallError, ::Redis::CommandError => e + # spring calls its after_fork hooks _after_ establishing a new db connection + # after forking. so we don't get a chance to close the connection. just ignore + # the error (but lets use logging to make sure we know if it happens) + Rails.logger.error(" [REDIS] Query failure #{e.inspect} (#{redis_name})") + return failure_retval if e.is_a?(::Redis::InheritedError) && defined?(Spring) + + # We want to rescue errors such as "max number of clients reached", but not + # actual logic errors such as trying to evalsha a script that doesn't + # exist. + # These are both CommandErrors, so we can only differentiate based on the + # exception message. + if e.is_a?(::Redis::CommandError) && e.message !~ /\bmax number of clients reached\b/ + raise + end + + InstStatsd::Statsd.increment("redis.errors.all") + InstStatsd::Statsd.increment("redis.errors.#{InstStatsd::Statsd.escape(redis_name)}", + short_stat: 'redis.errors', + tags: {redis_name: InstStatsd::Statsd.escape(redis_name)}) + Rails.logger.error "Failure handling redis command on #{redis_name}: #{e.inspect}" + + settings_store.skip_cache do + if self.ignore_redis_failures? + CanvasCache.invoke_on_captured_error(e) + last_redis_failure[redis_name] = Time.zone.now + failure_retval + else + raise + end + end + end + + class UnsupportedRedisMethod < RuntimeError + end + + BoolifySet = + lambda { |value| + if value && value == "OK" + true + elsif value && value == :failure + nil + else + false + end + } + + module Client + def disconnect_if_idle(since_when) + disconnect if !@process_start || @process_start < since_when + end + + def process(commands, *a, &b) + # These instance vars are used by the added #log_request_response method. + @processing_requests = commands.map(&:dup) + @process_start = Time.zone.now + + # try to return the type of value the command would expect, for some + # specific commands that we know can cause problems if we just return + # nil + # + # this isn't fool-proof, and in some situations it would probably + # actually be nicer to raise the exception and let the app handle it, + # but it's what we're going with for now + # + # for instance, Rails.cache.delete_matched will error out if the 'keys' command returns nil instead of [] + last_command = commands.try(:last) + last_command_args = Array.wrap(last_command) + last_command = (last_command.respond_to?(:first) ? last_command.first : last_command).to_s + failure_val = case last_command + when 'keys', 'hmget', 'mget' + [] + when 'scan' + ["0", []] + when 'del' + 0 + end + + if last_command == 'set' && (last_command_args.include?('XX') || last_command_args.include?('NX')) + failure_val = :failure + end + CanvasCache::Redis.handle_redis_failure(failure_val, self.location) do + super + end + end + + NON_KEY_COMMANDS = %i[eval evalsha].freeze + + def read + response = super + # Each #read grabs the response to one command send to #process, so we + # pop off the next queued request and send that to the logger. The redis + # client works this way because #process may be called with many commands + # at once, if using #pipeline. + @processing_requests ||= [] + @process_start ||= Time.zone.now + log_request_response(@processing_requests.shift, response, @process_start) + response + end + + SET_COMMANDS = %i{set setex}.freeze + def log_request_response(request, response, start_time) + return if request.nil? # redis client does internal keepalives and connection commands + return if CanvasCache::Redis.log_style == 'off' + return unless Rails.logger + + command = request.shift + message = { + message: "redis_request", + command: command, + # request_size is the sum of all the string parameters send with the command. + request_size: request.sum { |c| c.to_s.size }, + request_time_ms: ((Time.zone.now - start_time) * 1000).round(3), + host: location, + } + unless NON_KEY_COMMANDS.include?(command) + message[:key] = if command == :mset + # This is an array with a single element: an array alternating key/values + request.first { |v| v.first }.select.with_index { |_,i| (i) % 2 == 0 } + elsif command == :mget + request + else + request.first + end + end + if defined?(Marginalia) + message[:controller] = Marginalia::Comment.controller + message[:action] = Marginalia::Comment.action + message[:job_tag] = Marginalia::Comment.job_tag + end + if SET_COMMANDS.include?(command) && Thread.current[:last_cache_generate] + # :last_cache_generate comes from the instrumentation added in + # config/initializers/cache_store.rb in canvas. + # + # TODO: port the code in that initializer to something in thi gem. + # + # This is necessary because the Rails caching layer doesn't pass this + # information down to the Redis client -- we could try to infer it by + # looking for reads followed by writes to the same key, but this would be + # error prone, especially since further cache reads can happen inside the + # generation block. + message[:generate_time_ms] = Thread.current[:last_cache_generate] * 1000 + Thread.current[:last_cache_generate] = nil + end + if response.is_a?(Exception) + message[:error] = response.to_s + message[:response_size] = 0 + else + message[:response_size] = response.try(:size) || 0 + end + + logline = format_log_message(message) + Rails.logger.debug(logline) + end + + def format_log_message(message) + if CanvasCache::Redis.log_style == 'json' + JSON.generate(message.compact) + else + message[:key] ||= "-" + CanvasCache::Redis::COMPACT_LINE % message + end + end + + def write(command) + if UNSUPPORTED_METHODS.include?(command.first.to_s) + raise(UnsupportedRedisMethod, "Redis method `#{command.first}` is not supported by Twemproxy, and so shouldn't be used in Canvas") + end + if ALLOWED_UNSUPPORTED.include?(command.first.to_s) && GuardRail.environment != :deploy + raise(UnsupportedRedisMethod, "Redis method `#{command.first}` is potentially dangerous, and should only be called from console, and only if you fully understand the consequences. If you're sure, retry after running GuardRail.activate!(:deploy)") + end + + super + end + + UNSUPPORTED_METHODS = %w[ + migrate + move + object + randomkey + rename + renamenx + scan + bitop + msetnx + blpop + brpop + brpoplpush + psubscribe + publish + punsubscribe + subscribe + unsubscribe + discard + exec + multi + unwatch + watch + script + echo + ping + ].freeze + + # There are some methods that are not supported by twemproxy, but which we + # don't block, because they are viewed as maintenance-type commands that + # wouldn't be run as part of normal code, but could be useful to run + # one-off in script/console if you aren't using twemproxy, or in specs: + ALLOWED_UNSUPPORTED = %w[ + keys + auth + quit + flushall + flushdb + info + bgrewriteaof + bgsave + client + config + dbsize + debug + lastsave + monitor + save + shutdown + slaveof + slowlog + sync + time + ].freeze + end + + module Distributed + def initialize(addresses, options = { }) + options[:ring] ||= CanvasCache::HashRing.new([], options[:replicas], options[:digest]) + super + end + end + + def self.patch + Bundler.require 'redis' + require 'redis/distributed' + + ::Redis::Client.prepend(::CanvasCache::Redis::Client) + ::Redis::Distributed.prepend(::CanvasCache::Redis::Distributed) + ::Redis.send(:remove_const, :BoolifySet) + ::Redis.const_set(:BoolifySet, ::CanvasCache::Redis::BoolifySet) + end + end +end diff --git a/gems/canvas_cache/spec/canvas_cache/redis_spec.rb b/gems/canvas_cache/spec/canvas_cache/redis_spec.rb new file mode 100644 index 00000000000..bc25ed3c31c --- /dev/null +++ b/gems/canvas_cache/spec/canvas_cache/redis_spec.rb @@ -0,0 +1,389 @@ +# frozen_string_literal: true + +# +# Copyright (C) 2011 - present Instructure, Inc. +# +# This file is part of Canvas. +# +# Canvas is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, version 3 of the License. +# +# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see . +# + +require 'spec_helper' +require 'timecop' +require 'active_support/time_with_zone' + +describe CanvasCache::Redis do + include_context "caching_helpers" + + describe ".enabled?" do + before(:each) do + CanvasCache::Redis.reset_config! + end + + after(:each) do + CanvasCache::Redis.reset_config! + end + + it "is true when it finds a config" do + allow(ConfigFile).to receive(:load).with('redis').and_return({some: :data}) + expect(CanvasCache::Redis).to be_enabled + end + + it "is false when config-less" do + allow(ConfigFile).to receive(:load).with('redis').and_return(nil) + expect(CanvasCache::Redis).to_not be_enabled + end + end + + describe "with redis" do + before(:each) do + skip("redis required to test") unless CanvasCache::Redis.enabled? + end + + let(:redis_client){ CanvasCache::Redis.redis } + + it "doesn't marshall" do + redis_client.set('test', 1) + expect(redis_client.get('test')).to eq '1' + end + + describe ".disconnect!" do + it "generates a new client" do + r = CanvasCache::Redis.redis + expect(r).to eq(redis_client) + CanvasCache::Redis.disconnect! + r2 = CanvasCache::Redis.redis + expect(r2).to_not be_nil + expect(r2).to_not eq(r) + end + end + + describe "locking" do + it "should succeed if the lock isn't taken" do + expect(CanvasCache::Redis.lock('test1')).to eq true + expect(CanvasCache::Redis.lock('test2')).to eq true + end + + it "should fail if the lock is taken" do + expect(CanvasCache::Redis.lock('test1')).to eq true + expect(CanvasCache::Redis.lock('test1')).to eq false + expect(CanvasCache::Redis.unlock('test1')).to eq true + expect(CanvasCache::Redis.lock('test1')).to eq true + end + + it "should live forever if no expire time is given" do + expect(CanvasCache::Redis.lock('test1')).to eq true + expect(CanvasCache::Redis.redis.ttl(CanvasCache::Redis.lock_key('test1'))).to eq(-1) + end + + it "should set the expire time if given" do + expect(CanvasCache::Redis.lock('test1', 15)).to eq true + ttl = CanvasCache::Redis.redis.ttl(CanvasCache::Redis.lock_key('test1')) + expect(ttl).to be > 0 + expect(ttl).to be <= 15 + end + end + + describe "exceptions" do + before do + CanvasCache::Redis.patch + end + + after do + CanvasCache::Redis.reset_redis_failure + end + + it "should protect against errnos" do + expect(redis_client._client).to receive(:write).and_raise(Errno::ETIMEDOUT).once + expect(redis_client.set('blah', 'blah')).to eq nil + end + + it "should protect against max # of client errors" do + expect(redis_client._client).to receive(:write).and_raise(Redis::CommandError.new("ERR max number of clients reached")).once + expect(redis_client.set('blah', 'blah')).to eq nil + end + + it "should pass through other command errors" do + expect(InstStatsd::Statsd).not_to receive(:increment) + + expect(redis_client._client).to receive(:write).and_raise(Redis::CommandError.new("NOSCRIPT No matching script. Please use EVAL.")).once + expect { redis_client.evalsha('xxx') }.to raise_error(Redis::CommandError) + + expect(redis_client._client).to receive(:write).and_raise(Redis::CommandError.new("ERR no such key")).once + expect { redis_client.get('no-such-key') }.to raise_error(Redis::CommandError) + end + + describe "redis failure" do + let(:cache) { ActiveSupport::Cache::RedisCacheStore.new(url: 'redis://localhost:1234') } + + before do + allow(cache.redis._client).to receive(:ensure_connected).and_raise(Redis::TimeoutError) + end + + it "should not fail cache.read" do + override_cache(cache) do + expect(Rails.cache.read('blah')).to eq nil + end + end + + it "should not call redis again after an error" do + override_cache(cache) do + expect(Rails.cache.read('blah')).to eq nil + # call again, the .once means that if it hits Redis::Client again it'll fail + expect(Rails.cache.read('blah')).to eq nil + end + end + + it "should not fail cache.write" do + override_cache(cache) do + expect(Rails.cache.write('blah', 'someval')).to eq nil + end + end + + it "should not fail cache.delete" do + override_cache(cache) do + expect(Rails.cache.delete('blah')).to eq 0 + end + end + + it "should not fail cache.delete for a ring" do + override_cache(ActiveSupport::Cache::RedisCacheStore.new(url: ['redis://localhost:1234', 'redis://localhost:4567'])) do + expect(Rails.cache.delete('blah')).to eq 0 + end + end + + it "should not fail cache.exist?" do + override_cache(cache) do + expect(Rails.cache.exist?('blah')).to be_falsey + end + end + + it "should not fail cache.delete_matched" do + override_cache(cache) do + expect(Rails.cache.delete_matched('blah')).to eq nil + end + end + + it "should fail separate servers separately" do + cache = ActiveSupport::Cache::RedisCacheStore.new(url: [redis_client.id, 'redis://nonexistent:1234/0']) + client = cache.redis + key2 = 2 + while client.node_for('1') == client.node_for(key2.to_s) + key2 += 1 + end + key2 = key2.to_s + expect(client.node_for('1')).not_to eq client.node_for(key2) + expect(client.nodes.last.id).to eq 'redis://nonexistent:1234/0' + expect(client.nodes.last._client).to receive(:ensure_connected).and_raise(Redis::TimeoutError).once + + cache.write('1', true, :use_new_rails => false) + cache.write(key2, true, :use_new_rails => false) + # one returned nil, one returned true; we don't know which one which key ended up on + expect([ + cache.fetch('1', :use_new_rails => false), + cache.fetch(key2, :use_new_rails => false) + ].compact).to eq [true] + end + + it "should not fail raw redis commands" do + expect(redis_client._client).to receive(:ensure_connected).and_raise(Redis::TimeoutError).once + expect(redis_client.setnx('my_key', 5)).to eq nil + end + + it "returns a non-nil structure for mget" do + expect(redis_client._client).to receive(:ensure_connected).and_raise(Redis::TimeoutError).once + expect(redis_client.mget(['k1', 'k2', 'k3'])).to eq [] + end + + it "distinguishes between failure and not exists for set nx" do + redis_client.del('my_key') + expect(redis_client.set('my_key', 5, nx: true)).to eq true + expect(redis_client.set('my_key', 5, nx: true)).to eq false + expect(redis_client._client).to receive(:ensure_connected).and_raise(Redis::TimeoutError).once + expect(redis_client.set('my_key', 5, nx: true)).to eq nil + end + + end + end + + describe "json logging" do + let(:key) { 'mykey' } + let(:key2) { 'mykey2' } + let(:val) { 'myvalue' } + + before { allow(CanvasCache::Redis).to receive(:log_style).and_return('json') } + + it "should log information on the redis request" do + log_lines = capture_log_messages do + redis_client.set(key, val) + end + message = JSON.parse(log_lines.pop) + expect(message["message"]).to eq("redis_request") + expect(message["command"]).to eq("set") + expect(message["key"]).to eq("mykey") + expect(message["request_size"]).to eq((key+val).size) + expect(message["response_size"]).to eq(2) # "OK" + expect(message["host"]).not_to be_nil + expect(message["request_time_ms"]).to be_a(Float) + end + + it "should not log the lua eval code" do + log_lines = capture_log_messages do + redis_client.eval('local a = 1') + end + message = log_lines[0] + expect(message["key"]).to be_nil + end + + it "should log error on redis error response" do + log_lines = capture_log_messages do + expect { redis_client.eval('totes not lua') }.to raise_error(Redis::CommandError) + end + message = JSON.parse(log_lines.first) + expect(message["response_size"]).to eq(0) + expect(message["error"]).to be_a(String) + end + + context "rails caching" do + let(:cache) do + ActiveSupport::Cache::RedisCacheStore.new(redis: redis_client) + end + + it "should log the cache fetch block generation time" do + begin + Timecop.safe_mode = false + Timecop.freeze + log_lines = capture_log_messages do + # make sure this works with fetching nested fetches + cache.fetch(key, force: true) do + val = +"a1" + val << cache.fetch(key2, force: true) do + Timecop.travel(Time.zone.now + 1.second) + # Cheat to cover the missing ActiveSupport::Notifications.subscription in config/inititalizers/cache_store.rb + # TODO: remove this hack when initializer is ported to gem and incorporated + Thread.current[:last_cache_generate] = 1 + "b1" + end + Timecop.travel(Time.zone.now + 2.seconds) + # Cheat to cover the missing ActiveSupport::Notifications.subscription in config/inititalizers/cache_store.rb + # TODO: remove this hack when initializer is ported to gem and incorporated + Thread.current[:last_cache_generate] = 3 + val << "a2" + end + end + outer_message = JSON.parse(log_lines.pop) + expect(outer_message["command"]).to eq("set") + expect(outer_message["key"]).to be_ends_with(key) + expect(outer_message["request_time_ms"]).to be_a(Float) + # 3000 (3s) == 2s outer fetch + 1s inner fetch + expect(outer_message["generate_time_ms"]).to be_within(500).of(3000) + + inner_message = JSON.parse(log_lines.pop) + expect(inner_message["command"]).to eq("set") + expect(inner_message["key"]).to be_ends_with(key2) + expect(inner_message["request_time_ms"]).to be_a(Float) + expect(inner_message["generate_time_ms"]).to be_within(500).of(1000) + ensure + Timecop.return + Timecop.safe_mode = true + end + end + + it "should log zero response size on cache miss" do + cache.delete(key) + log_lines = capture_log_messages do + expect(cache.read(key)).to be_nil + end + message = JSON.parse(log_lines.pop) + expect(message["command"]).to eq("get") + expect(message["response_size"]).to eq(0) + end + end + end + + it "should log compactly by default on the redis request" do + # cache to avoid capturing a log line for db lookup + CanvasCache::Redis.log_style + log_lines = capture_log_messages do + redis_client.set('mykey', 'myvalue') + end + msg = log_lines.pop + expect(msg).to match(/Redis \(\d+\.\d+ms\) set mykey \[.*\]/) + end + + it "should allow disabling redis logging" do + allow(CanvasCache::Redis).to receive(:log_style).and_return('off') + log_lines = capture_log_messages do + redis_client.set('mykey', 'myvalue') + end + expect(log_lines).to be_empty + end + + describe "CanvasCache::RedisWrapper" do + it "should raise on unsupported commands" do + expect { redis_client.keys }.to raise_error(CanvasCache::Redis::UnsupportedRedisMethod) + end + end + + describe "handle_redis_failure" do + before do + CanvasCache::Redis.patch + end + + after do + CanvasCache::Redis.reset_redis_failure + end + + it "logs any redis error when they occur" do + messages = [] + expect(Rails.logger).to receive(:error) do |message| + messages << message + end.at_least(:once) + CanvasCache::Redis.handle_redis_failure({'failure'=>'val'}, 'local_fake_redis') do + raise ::Redis::InheritedError, "intentional failure" + end + # we don't log the second message under spring, cause reasons; we only + # care about the primary message anyway + msgs = messages.select{|m| m =~ /Query failure/ } + expect(msgs.length).to eq(1) + m = msgs.first + expect(m).to match(/\[REDIS\] Query failure/) + expect(m).to match(/\(local_fake_redis\)/) + expect(m).to match(/InheritedError/) + end + + it "tracks failure only briefly for local redis" do + local_node = "localhost:9999" + expect(CanvasCache::Redis.redis_failure?(local_node)).to be_falsey + CanvasCache::Redis.last_redis_failure[local_node] = Time.now + expect(CanvasCache::Redis.redis_failure?(local_node)).to be_truthy + Timecop.travel(4) do + expect(CanvasCache::Redis.redis_failure?(local_node)).to be_falsey + end + end + + it "circuit breaks for standard nodes for a different amount of time" do + remote_node = "redis-test-node-42:9999" + expect(CanvasCache::Redis.redis_failure?(remote_node)).to be_falsey + CanvasCache::Redis.last_redis_failure[remote_node] = Time.now + expect(CanvasCache::Redis.redis_failure?(remote_node)).to be_truthy + Timecop.travel(4) do + expect(CanvasCache::Redis.redis_failure?(remote_node)).to be_truthy + end + Timecop.travel(400) do + expect(CanvasCache::Redis.redis_failure?(remote_node)).to be_falsey + end + end + end + end +end \ No newline at end of file diff --git a/gems/canvas_cache/spec/fixtures/config/redis.yml b/gems/canvas_cache/spec/fixtures/config/redis.yml new file mode 100644 index 00000000000..4db805f904a --- /dev/null +++ b/gems/canvas_cache/spec/fixtures/config/redis.yml @@ -0,0 +1,12 @@ +development: + servers: + - redis://redis + +test: + # only tests that are exercising the integration with redis require redis to run. + servers: + - redis://redis + # warning: the redis database will get cleared before each test, so if you + # use this server for anything else, make sure to set aside a database id for + # these tests to use. + database: 1 diff --git a/gems/canvas_cache/spec/spec_helper.rb b/gems/canvas_cache/spec/spec_helper.rb index afa7a8d1790..6c820b4853e 100644 --- a/gems/canvas_cache/spec/spec_helper.rb +++ b/gems/canvas_cache/spec/spec_helper.rb @@ -19,6 +19,105 @@ require 'byebug' require 'canvas_cache' +require 'action_controller' +require 'active_record' + +Rails.env = 'test' +Time.zone = 'UTC' # This is simplest, fight me. + +# Right now Canvas injects the Setting class as the store. +# It would be great to pull that one out to something we can +# depend on as an adapter that Canvas can submit Setting itself +# as a strategy for...anyway, use this for now for specs +class MemorySettings + def initialize(data={}) + @settings = data || {} + end + + def get(key, default) + @settings.fetch(key, default) + end + + def set(key, value) + @settings[key] = value + end + + def skip_cache + yield + end +end +CanvasCache.settings_store = MemorySettings.new + +# give the logger some implementation since +# we aren't initializing a full app in these specs +Rails.logger = Logger.new(STDOUT) + + +RSpec.shared_context "caching_helpers", :shared_context => :metadata do + # provide a way to temporarily replace the rails + # cache with one constructed in a spec. + def override_cache(new_cache=:memory_store) + previous_cache = Rails.cache + previous_perform_caching = ActionController::Base.perform_caching + set_cache(new_cache) + if block_given? + begin + yield + ensure + allow(Rails).to receive(:cache).and_return(previous_cache) + allow(ActionController::Base).to receive(:cache_store).and_return(previous_cache) + allow_any_instance_of(ActionController::Base).to receive(:cache_store).and_return(previous_cache) + allow(ActionController::Base).to receive(:perform_caching).and_return(previous_perform_caching) + allow_any_instance_of(ActionController::Base).to receive(:perform_caching).and_return(previous_perform_caching) + end + end + end + + def set_cache(new_cache) + cache_opts = {} + if new_cache == :redis_cache_store + if CanvasCache::Redis.redis_enabled? + cache_opts[:redis] = CanvasCache::Redis.redis + else + skip "redis required" + end + end + new_cache ||= :null_store + new_cache = ActiveSupport::Cache.lookup_store(new_cache, cache_opts) + allow(Rails).to receive(:cache).and_return(new_cache) + allow(ActionController::Base).to receive(:cache_store).and_return(new_cache) + allow_any_instance_of(ActionController::Base).to receive(:cache_store).and_return(new_cache) + allow(ActionController::Base).to receive(:perform_caching).and_return(true) + allow_any_instance_of(ActionController::Base).to receive(:perform_caching).and_return(true) + # TODO: re-enable this once multi_cache is pulled over to gem. + # MultiCache.reset + end + + # some specs need to check what's being written to the log, provide + # an easy way to do so here + def capture_log_messages + prev_logger = Rails.logger + + collector_class = Class.new(ActiveSupport::Logger) do + attr_reader :captured_message_stack + def initialize + @captured_message_stack ||= [] + end + + def add(severity, message=nil, progname=nil, &block) + message = (message || (block && block.call) || progname).to_s + @captured_message_stack << message + end + end + + lgr = collector_class.new + Rails.logger = lgr + yield + return lgr.captured_message_stack + ensure + Rails.logger = prev_logger + end +end RSpec.configure do |config| config.mock_with :rspec do |mocks| @@ -26,4 +125,15 @@ RSpec.configure do |config| end config.order = 'random' -end + + config.before(:each) do + # load config from local spec/fixtures/config/redis.yml + # so that we have something for ConfigFile to parse. + target_location = Pathname.new(File.join(File.dirname(__FILE__), 'fixtures')) + allow(Rails).to receive(:root).and_return(target_location) + + # make sure redis is in a stable state before every spec + GuardRail.activate(:deploy) { CanvasCache::Redis.redis.flushdb } + end + +end \ No newline at end of file diff --git a/gems/config_file/lib/config_file.rb b/gems/config_file/lib/config_file.rb index 7eca8eb89f5..4712816df67 100644 --- a/gems/config_file/lib/config_file.rb +++ b/gems/config_file/lib/config_file.rb @@ -18,6 +18,8 @@ # with this program. If not, see . # +require 'rails' + module ConfigFile class << self def unstub diff --git a/lib/canvas.rb b/lib/canvas.rb index 8e6d60ea43e..4dd001d61b2 100644 --- a/lib/canvas.rb +++ b/lib/canvas.rb @@ -33,15 +33,15 @@ module Canvas end def self.redis - Canvas::Redis.redis + CanvasCache::Redis.redis end def self.redis_config - Canvas::Redis.redis_config + CanvasCache::Redis.config end def self.redis_enabled? - Canvas::Redis.redis_enabled? + CanvasCache::Redis.enabled? end def self.reconnect_redis diff --git a/lib/canvas/redis.rb b/lib/canvas/redis.rb index 3b2798cfb5e..4b0746ca090 100644 --- a/lib/canvas/redis.rb +++ b/lib/canvas/redis.rb @@ -17,381 +17,28 @@ # You should have received a copy of the GNU Affero General Public License along # with this program. If not, see . -module Canvas::Redis - def self.redis - raise "Redis is not enabled for this install" unless redis_enabled? - if redis_config == 'cache_store' || redis_config.is_a?(Hash) && redis_config['servers'] == 'cache_store' - return Rails.cache.redis - end - @redis ||= begin - Canvas::Redis.patch - settings = ConfigFile.load('redis').dup - settings['url'] = settings['servers'] if settings['servers'] - ActiveSupport::Cache::RedisCacheStore.build_redis(**settings.to_h.symbolize_keys) - end - end +module Canvas + # TODO: This implementation is being moved to the gem CanvasCache + # but this shim is helping us not break in the meantime. + # once that has replaced all callsites, + # we won't need this shim anymore, and can drop this file entirely. + Redis = CanvasCache::Redis +end - def self.redis_config - @redis_config ||= ConfigFile.load('redis') - end - def self.redis_enabled? - @redis_enabled ||= redis_config.present? - end +# TODO: the connection management being delegated here +# isn't necessarily part of the canvas_cache gem. +# Once callsites have been updated to use the RedisConnections module +# instead, we can drop this module re-openning. +class << Canvas::Redis # technically this is just disconnect_redis, because new connections are created lazily, - # but I didn't want to rename it when there are several uses of it - def self.reconnect_redis - if Rails.cache && - defined?(ActiveSupport::Cache::RedisCacheStore) && - Rails.cache.is_a?(ActiveSupport::Cache::RedisCacheStore) - Canvas::Redis.handle_redis_failure(nil, "none") do - redis = Rails.cache.redis - if redis.respond_to?(:nodes) - redis.nodes.each(&:disconnect!) - else - redis.disconnect! - end - end - end - - if MultiCache.cache.is_a?(ActiveSupport::Cache::HaStore) - Canvas::Redis.handle_redis_failure(nil, "none") do - redis = MultiCache.cache.redis - if redis.respond_to?(:nodes) - redis.nodes.each(&:disconnect!) - else - redis.disconnect! - end - end - end - - return unless @redis - # We're sharing redis connections between Canvas::Redis.redis and Rails.cache, - # so don't call reconnect on the cache too. - return if Rails.cache.respond_to?(:redis) && @redis == Rails.cache.redis - @redis = nil + # but we didn't want to rename it when there are several uses of it + def reconnect_redis + ::Canvas::RedisConnections.disconnect! end - def self.clear_idle_connections - # every 1 minute, clear connections that have been idle at least a minute - clear_frequency = Setting.get("clear_idle_connections_frequency", 60).to_i - clear_timeout = Setting.get("clear_idle_connections_timeout", 60).to_i - @last_clear_time ||= Time.now.utc - if (Time.now.utc - @last_clear_time > clear_frequency) - @last_clear_time = Time.now.utc - # gather all the redises we can find - redises = Switchman.config[:cache_map].values. - map { |cache| cache.try(:redis) }.compact.uniq. - map { |redis| redis.try(:ring)&.nodes || [redis] }.inject([], &:concat).uniq - redises.each { |r| r._client.disconnect_if_idle(@last_clear_time - clear_timeout) } - end - end - - # try to grab a lock in Redis, returning false if the lock can't be held. If - # the lock is grabbed and `ttl` is given, it'll be set to expire after `ttl` - # seconds. - def self.lock(key, ttl = nil) - return true unless Canvas::Redis.redis_enabled? - full_key = lock_key(key) - if Canvas::Redis.redis.setnx(full_key, 1) - Canvas::Redis.redis.expire(full_key, ttl.to_i) if ttl - true - else - # key is already used - false - end - end - - # unlock a previously grabbed Redis lock. This doesn't do anything to verify - # that this process took the lock. - def self.unlock(key) - Canvas::Redis.redis.del(lock_key(key)) - true - end - - def self.lock_key(key) - "lock:#{key}" - end - - def self.ignore_redis_failures? - Setting.get('ignore_redis_failures', 'true') == 'true' - end - - COMPACT_LINE = "Redis (%{request_time_ms}ms) %{command} %{key} [%{host}]".freeze - def self.log_style - # supported: 'off', 'compact', 'json' - @log_style ||= ConfigFile.load('redis')&.[]('log_style') || 'compact' - end - - def self.redis_failure?(redis_name) - return false unless last_redis_failure[redis_name] - # i feel this dangling rescue is justifiable, given the try-to-be-failsafe nature of this code - if redis_name =~ /localhost/ - # talking to local redis should not short ciruit as long - return (Time.now - last_redis_failure[redis_name]) < (Setting.get('redis_local_failure_time', '2').to_i rescue 2) - end - return (Time.now - last_redis_failure[redis_name]) < (Setting.get('redis_failure_time', '300').to_i rescue 300) - end - - def self.last_redis_failure - @last_redis_failure ||= {} - end - - def self.reset_redis_failure - @last_redis_failure = {} - end - - def self.handle_redis_failure(failure_retval, redis_name) - Setting.skip_cache do - if redis_failure?(redis_name) - Rails.logger.warn(" [REDIS] Short circuiting due to recent redis failure (#{redis_name})") - return failure_retval - end - end - reply = yield - raise reply if reply.is_a?(Exception) - reply - rescue ::Redis::BaseConnectionError, SystemCallError, ::Redis::CommandError => e - # spring calls its after_fork hooks _after_ establishing a new db connection - # after forking. so we don't get a chance to close the connection. just ignore - # the error (but lets use logging to make sure we know if it happens) - Rails.logger.error(" [REDIS] Query failure #{e.inspect} (#{redis_name})") - return failure_retval if e.is_a?(::Redis::InheritedError) && defined?(Spring) - - # We want to rescue errors such as "max number of clients reached", but not - # actual logic errors such as trying to evalsha a script that doesn't - # exist. - # These are both CommandErrors, so we can only differentiate based on the - # exception message. - if e.is_a?(::Redis::CommandError) && e.message !~ /\bmax number of clients reached\b/ - raise - end - - InstStatsd::Statsd.increment("redis.errors.all") - InstStatsd::Statsd.increment("redis.errors.#{InstStatsd::Statsd.escape(redis_name)}", - short_stat: 'redis.errors', - tags: {redis_name: InstStatsd::Statsd.escape(redis_name)}) - Rails.logger.error "Failure handling redis command on #{redis_name}: #{e.inspect}" - - Setting.skip_cache do - if self.ignore_redis_failures? - Canvas::Errors.capture_exception(:redis, e, :warn) - last_redis_failure[redis_name] = Time.now - failure_retval - else - raise - end - end - end - - class UnsupportedRedisMethod < RuntimeError - end - - BoolifySet = - lambda { |value| - if value && "OK" == value - true - elsif value && :failure == value - nil - else - false - end - } - - module Client - def disconnect_if_idle(since_when) - disconnect if !@process_start || @process_start < since_when - end - - def process(commands, *a, &b) - # These instance vars are used by the added #log_request_response method. - @processing_requests = commands.map(&:dup) - @process_start = Time.now - - # try to return the type of value the command would expect, for some - # specific commands that we know can cause problems if we just return - # nil - # - # this isn't fool-proof, and in some situations it would probably - # actually be nicer to raise the exception and let the app handle it, - # but it's what we're going with for now - # - # for instance, Rails.cache.delete_matched will error out if the 'keys' command returns nil instead of [] - last_command = commands.try(:last) - last_command_args = Array.wrap(last_command) - last_command = (last_command.respond_to?(:first) ? last_command.first : last_command).to_s - failure_val = case last_command - when 'keys', 'hmget', 'mget' - [] - when 'scan' - ["0", []] - when 'del' - 0 - end - if last_command == 'set' && (last_command_args.include?('XX') || last_command_args.include?('NX')) - failure_val = :failure - end - Canvas::Redis.handle_redis_failure(failure_val, self.location) do - super - end - end - - NON_KEY_COMMANDS = %i[eval evalsha].freeze - - def read - response = super - # Each #read grabs the response to one command send to #process, so we - # pop off the next queued request and send that to the logger. The redis - # client works this way because #process may be called with many commands - # at once, if using #pipeline. - @processing_requests ||= [] - @process_start ||= Time.now - log_request_response(@processing_requests.shift, response, @process_start) - response - end - - SET_COMMANDS = %i{set setex}.freeze - def log_request_response(request, response, start_time) - return if request.nil? # redis client does internal keepalives and connection commands - return if Canvas::Redis.log_style == 'off' - return unless Rails.logger - - command = request.shift - message = { - message: "redis_request".freeze, - command: command, - # request_size is the sum of all the string parameters send with the command. - request_size: request.sum { |c| c.to_s.size }, - request_time_ms: ((Time.now - start_time) * 1000).round(3), - host: location, - } - unless NON_KEY_COMMANDS.include?(command) - if command == :mset - # This is an array with a single element: an array alternating key/values - message[:key] = request.first { |v| v.first }.select.with_index { |_,i| (i) % 2 == 0 } - elsif command == :mget - message[:key] = request - else - message[:key] = request.first - end - end - if defined?(Marginalia) - message[:controller] = Marginalia::Comment.controller - message[:action] = Marginalia::Comment.action - message[:job_tag] = Marginalia::Comment.job_tag - end - if SET_COMMANDS.include?(command) && Thread.current[:last_cache_generate] - # :last_cache_generate comes from the instrumentation added in - # config/initializeers/cache_store_instrumentation.rb - # This is necessary because the Rails caching layer doesn't pass this - # information down to the Redis client -- we could try to infer it by - # looking for reads followed by writes to the same key, but this would be - # error prone, especially since further cache reads can happen inside the - # generation block. - message[:generate_time_ms] = Thread.current[:last_cache_generate] * 1000 - Thread.current[:last_cache_generate] = nil - end - if response.is_a?(Exception) - message[:error] = response.to_s - message[:response_size] = 0 - else - message[:response_size] = response.try(:size) || 0 - end - - logline = format_log_message(message) - Rails.logger.debug(logline) - end - - def format_log_message(message) - if Canvas::Redis.log_style == 'json' - JSON.generate(message.compact) - else - message[:key] ||= "-" - Canvas::Redis::COMPACT_LINE % message - end - end - - def write(command) - if UNSUPPORTED_METHODS.include?(command.first.to_s) - raise(UnsupportedRedisMethod, "Redis method `#{command.first}` is not supported by Twemproxy, and so shouldn't be used in Canvas") - end - if ALLOWED_UNSUPPORTED.include?(command.first.to_s) && GuardRail.environment != :deploy - raise(UnsupportedRedisMethod, "Redis method `#{command.first}` is potentially dangerous, and should only be called from console, and only if you fully understand the consequences. If you're sure, retry after running GuardRail.activate!(:deploy)") - end - super - end - - UNSUPPORTED_METHODS = %w[ - migrate - move - object - randomkey - rename - renamenx - scan - bitop - msetnx - blpop - brpop - brpoplpush - psubscribe - publish - punsubscribe - subscribe - unsubscribe - discard - exec - multi - unwatch - watch - script - echo - ping - ].freeze - - # There are some methods that are not supported by twemproxy, but which we - # don't block, because they are viewed as maintenance-type commands that - # wouldn't be run as part of normal code, but could be useful to run - # one-off in script/console if you aren't using twemproxy, or in specs: - ALLOWED_UNSUPPORTED = %w[ - keys - auth - quit - flushall - flushdb - info - bgrewriteaof - bgsave - client - config - dbsize - debug - lastsave - monitor - save - shutdown - slaveof - slowlog - sync - time - ].freeze - end - - module Distributed - def initialize(addresses, options = { }) - options[:ring] ||= CanvasCache::HashRing.new([], options[:replicas], options[:digest]) - super - end - end - - def self.patch - Bundler.require 'redis' - require 'redis/distributed' - - Redis::Client.prepend(Client) - Redis::Distributed.prepend(Distributed) - Redis.send(:remove_const, :BoolifySet) - Redis.const_set(:BoolifySet, BoolifySet) + def clear_idle_connections + ::Canvas::RedisConnections.clear_idle! end end diff --git a/lib/canvas/redis_connections.rb b/lib/canvas/redis_connections.rb new file mode 100644 index 00000000000..e4df5bfcb05 --- /dev/null +++ b/lib/canvas/redis_connections.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +# +# Copyright (C) 2021 - present Instructure, Inc. +# +# This file is part of Canvas. +# +# Canvas is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, version 3 of the License. +# +# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see . + +module Canvas + ## + # RedisConnections is glue between the canvas + # app and the canvas_cache gem. + # Things like idle connection clearing + # require knowing the topology of our redis + # clusters and how they're cached in Switchman, + # information that's not important to push down + # into canvas_cache. + class RedisConnections + ## + # Drops every open connection, including both MultiCache (local) + # and connections to remote redis nodes + def self.disconnect! + if Rails.cache && + defined?(ActiveSupport::Cache::RedisCacheStore) && + Rails.cache.is_a?(ActiveSupport::Cache::RedisCacheStore) + ::CanvasCache::Redis.handle_redis_failure(nil, "none") do + redis = Rails.cache.redis + if redis.respond_to?(:nodes) + redis.nodes.each(&:disconnect!) + else + redis.disconnect! + end + end + end + + if MultiCache.cache.is_a?(ActiveSupport::Cache::HaStore) + ::CanvasCache::Redis.handle_redis_failure(nil, "none") do + redis = MultiCache.cache.redis + if redis.respond_to?(:nodes) + redis.nodes.each(&:disconnect!) + else + redis.disconnect! + end + end + end + + ::CanvasCache::Redis.disconnect! + end + + ## + # call somewhat frequently (e.g. every minute or so) + # to make sure we aren't holding open connections we aren't + # using. + def self.clear_idle! + clear_frequency = Setting.get("clear_idle_connections_frequency", 60).to_i + clear_timeout = Setting.get("clear_idle_connections_timeout", 60).to_i + @last_clear_time ||= Time.now.utc + if (Time.now.utc - @last_clear_time) > clear_frequency + @last_clear_time = Time.now.utc + # gather all the redises we can find + redises = Switchman.config[:cache_map].values. + map { |cache| cache.try(:redis) }.compact.uniq. + map { |redis| redis.try(:ring)&.nodes || [redis] }.inject([], &:concat).uniq + redises.each { |r| r._client.disconnect_if_idle(@last_clear_time - clear_timeout) } + end + end + end +end \ No newline at end of file diff --git a/spec/lib/canvas/redis_connections_spec.rb b/spec/lib/canvas/redis_connections_spec.rb new file mode 100644 index 00000000000..e7992440a33 --- /dev/null +++ b/spec/lib/canvas/redis_connections_spec.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +# +# Copyright (C) 2021 - present Instructure, Inc. +# +# This file is part of Canvas. +# +# Canvas is free software: you can redistribute it and/or modify it under +# the terms of the GNU Affero General Public License as published by the Free +# Software Foundation, version 3 of the License. +# +# Canvas is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +# details. +# +# You should have received a copy of the GNU Affero General Public License along +# with this program. If not, see . +# + +require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper.rb') + +describe "Canvas::RedisConnections" do + before(:each) do + skip("requires redis") unless CanvasCache::Redis.enabled? + end + + describe "disconnect!" do + it "checkes connections without exploding" do + expect { Canvas::RedisConnections.disconnect! }.to_not raise_error + end + end + + describe ".clear_idle!" do + it "culls connections without exploding" do + expect { Canvas::RedisConnections.clear_idle! }.to_not raise_error + end + end +end \ No newline at end of file diff --git a/spec/lib/canvas/redis_spec.rb b/spec/lib/canvas/redis_spec.rb index 5009f1cd90d..6aa54b8f96e 100644 --- a/spec/lib/canvas/redis_spec.rb +++ b/spec/lib/canvas/redis_spec.rb @@ -21,326 +21,14 @@ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper.rb') if Canvas.redis_enabled? + +# TODO: When CanvasCache::Redis has replaced all callsites, +# we won't need this shim anymore, and can drop this test verifying it. describe "Canvas::Redis" do it "doesn't marshall" do Canvas.redis.set('test', 1) expect(Canvas.redis.get('test')).to eq '1' end - - describe "locking" do - it "should succeed if the lock isn't taken" do - expect(Canvas::Redis.lock('test1')).to eq true - expect(Canvas::Redis.lock('test2')).to eq true - end - - it "should fail if the lock is taken" do - expect(Canvas::Redis.lock('test1')).to eq true - expect(Canvas::Redis.lock('test1')).to eq false - expect(Canvas::Redis.unlock('test1')).to eq true - expect(Canvas::Redis.lock('test1')).to eq true - end - - it "should live forever if no expire time is given" do - expect(Canvas::Redis.lock('test1')).to eq true - expect(Canvas::Redis.redis.ttl(Canvas::Redis.lock_key('test1'))).to eq -1 - end - - it "should set the expire time if given" do - expect(Canvas::Redis.lock('test1', 15)).to eq true - ttl = Canvas::Redis.redis.ttl(Canvas::Redis.lock_key('test1')) - expect(ttl).to be > 0 - expect(ttl).to be <= 15 - end - end - - describe "exceptions" do - before do - Canvas::Redis.patch - end - - after do - Canvas::Redis.reset_redis_failure - end - - it "should protect against errnos" do - expect(Canvas.redis._client).to receive(:write).and_raise(Errno::ETIMEDOUT).once - expect(Canvas.redis.set('blah', 'blah')).to eq nil - end - - it "should protect against max # of client errors" do - expect(Canvas.redis._client).to receive(:write).and_raise(Redis::CommandError.new("ERR max number of clients reached")).once - expect(Canvas.redis.set('blah', 'blah')).to eq nil - end - - it "should pass through other command errors" do - expect(InstStatsd::Statsd).to receive(:increment).never - - expect(Canvas.redis._client).to receive(:write).and_raise(Redis::CommandError.new("NOSCRIPT No matching script. Please use EVAL.")).once - expect { Canvas.redis.evalsha('xxx') }.to raise_error(Redis::CommandError) - - expect(Canvas.redis._client).to receive(:write).and_raise(Redis::CommandError.new("ERR no such key")).once - expect { Canvas.redis.get('no-such-key') }.to raise_error(Redis::CommandError) - end - - describe "redis failure" do - let(:cache) { ActiveSupport::Cache::RedisCacheStore.new(url: 'redis://localhost:1234') } - - before do - allow(cache.redis._client).to receive(:ensure_connected).and_raise(Redis::TimeoutError) - end - - it "should not fail cache.read" do - enable_cache(cache) do - expect(Rails.cache.read('blah')).to eq nil - end - end - - it "should not call redis again after an error" do - enable_cache(cache) do - expect(Rails.cache.read('blah')).to eq nil - # call again, the .once means that if it hits Redis::Client again it'll fail - expect(Rails.cache.read('blah')).to eq nil - end - end - - it "should not fail cache.write" do - enable_cache(cache) do - expect(Rails.cache.write('blah', 'someval')).to eq nil - end - end - - it "should not fail cache.delete" do - enable_cache(cache) do - expect(Rails.cache.delete('blah')).to eq 0 - end - end - - it "should not fail cache.delete for a ring" do - enable_cache(ActiveSupport::Cache::RedisCacheStore.new(url: ['redis://localhost:1234', 'redis://localhost:4567'])) do - expect(Rails.cache.delete('blah')).to eq 0 - end - end - - it "should not fail cache.exist?" do - enable_cache(cache) do - expect(Rails.cache.exist?('blah')).to be_falsey - end - end - - it "should not fail cache.delete_matched" do - enable_cache(cache) do - expect(Rails.cache.delete_matched('blah')).to eq nil - end - end - - it "should fail separate servers separately" do - cache = ActiveSupport::Cache::RedisCacheStore.new(url: [Canvas.redis.id, 'redis://nonexistent:1234/0']) - client = cache.redis - key2 = 2 - while client.node_for('1') == client.node_for(key2.to_s) - key2 += 1 - end - key2 = key2.to_s - expect(client.node_for('1')).not_to eq client.node_for(key2) - expect(client.nodes.last.id).to eq 'redis://nonexistent:1234/0' - expect(client.nodes.last._client).to receive(:ensure_connected).and_raise(Redis::TimeoutError).once - - cache.write('1', true, :use_new_rails => false) - cache.write(key2, true, :use_new_rails => false) - # one returned nil, one returned true; we don't know which one which key ended up on - expect([ - cache.fetch('1', :use_new_rails => false), - cache.fetch(key2, :use_new_rails => false) - ].compact).to eq [true] - end - - it "should not fail raw redis commands" do - expect(Canvas.redis._client).to receive(:ensure_connected).and_raise(Redis::TimeoutError).once - expect(Canvas.redis.setnx('my_key', 5)).to eq nil - end - - it "returns a non-nil structure for mget" do - expect(Canvas.redis._client).to receive(:ensure_connected).and_raise(Redis::TimeoutError).once - expect(Canvas.redis.mget(['k1', 'k2', 'k3'])).to eq [] - end - - it "distinguishes between failure and not exists for set nx" do - Canvas.redis.del('my_key') - expect(Canvas.redis.set('my_key', 5, nx: true)).to eq true - expect(Canvas.redis.set('my_key', 5, nx: true)).to eq false - expect(Canvas.redis._client).to receive(:ensure_connected).and_raise(Redis::TimeoutError).once - expect(Canvas.redis.set('my_key', 5, nx: true)).to eq nil - end - - end - end - - describe "json logging" do - let(:key) { 'mykey' } - let(:key2) { 'mykey2' } - let(:val) { 'myvalue' } - before { allow(Canvas::Redis).to receive(:log_style).and_return('json') } - - def json_logline(get = :shift) - # drop the non-json logging at the start of the line - JSON.parse(Rails.logger.captured_messages.send(get).match(/\{.*/)[0]) - end - - it "should log information on the redis request" do - Rails.logger.capture_messages do - Canvas.redis.set(key, val) - message = json_logline - expect(message["message"]).to eq("redis_request") - expect(message["command"]).to eq("set") - expect(message["key"]).to eq("mykey") - expect(message["request_size"]).to eq((key+val).size) - expect(message["response_size"]).to eq(2) # "OK" - expect(message["host"]).not_to be_nil - expect(message["request_time_ms"]).to be_a(Float) - end - end - - it "should not log the lua eval code" do - Rails.logger.capture_messages do - Canvas.redis.eval('local a = 1') - message = json_logline - expect(message["key"]).to be_nil - end - end - - it "should log error on redis error response" do - Rails.logger.capture_messages do - expect { Canvas.redis.eval('totes not lua') }.to raise_error(Redis::CommandError) - message = json_logline - expect(message["response_size"]).to eq(0) - expect(message["error"]).to be_a(String) - end - end - - context "rails caching" do - let(:cache) do - ActiveSupport::Cache::RedisCacheStore.new(redis: Canvas.redis) - end - - it "should log the cache fetch block generation time" do - begin - Timecop.safe_mode = false - Timecop.freeze - Rails.logger.capture_messages do - # make sure this works with fetching nested fetches - cache.fetch(key, force: true) do - val = +"a1" - val << cache.fetch(key2, force: true) do - Timecop.travel(Time.zone.now + 1.second) - "b1" - end - Timecop.travel(Time.zone.now + 2.seconds) - val << "a2" - end - outer_message = json_logline(:pop) - expect(outer_message["command"]).to eq("set") - expect(outer_message["key"]).to be_ends_with(key) - expect(outer_message["request_time_ms"]).to be_a(Float) - # 3000 (3s) == 2s outer fetch + 1s inner fetch - expect(outer_message["generate_time_ms"]).to be_within(500).of(3000) - - inner_message = json_logline(:pop) - expect(inner_message["command"]).to eq("set") - expect(inner_message["key"]).to be_ends_with(key2) - expect(inner_message["request_time_ms"]).to be_a(Float) - expect(inner_message["generate_time_ms"]).to be_within(500).of(1000) - end - ensure - Timecop.return - Timecop.safe_mode = true - end - end - - it "should log zero response size on cache miss" do - cache.delete(key) - Rails.logger.capture_messages do - expect(cache.read(key)).to be_nil - message = json_logline(:pop) - expect(message["command"]).to eq("get") - expect(message["response_size"]).to eq(0) - end - end - end - end - - it "should log compactly by default on the redis request" do - # cache to avoid capturing a log line for db lookup - Canvas::Redis.log_style - Rails.logger.capture_messages do - Canvas.redis.set('mykey', 'myvalue') - msg = Rails.logger.captured_messages.first - expect(msg).to match(/Redis \(\d+\.\d+ms\) set mykey \[.*\]/) - end - end - - it "should allow disabling redis logging" do - allow(Canvas::Redis).to receive(:log_style).and_return('off') - Rails.logger.capture_messages do - Canvas.redis.set('mykey', 'myvalue') - expect(Rails.logger.captured_messages).to be_empty - end - end - - describe "Canvas::RedisWrapper" do - it "should raise on unsupported commands" do - expect { Canvas.redis.keys }.to raise_error(Canvas::Redis::UnsupportedRedisMethod) - end - end - - describe "handle_redis_failure" do - before do - Canvas::Redis.patch - end - - after do - Canvas::Redis.reset_redis_failure - end - - it "logs any redis error when they occur" do - messages = [] - expect(Rails.logger).to receive(:error) do |message| - messages << message - end.at_least(:once) - Canvas::Redis.handle_redis_failure({'failure'=>'val'}, 'local_fake_redis') do - raise ::Redis::InheritedError, "intentional failure" - end - # we don't log the second message under spring, cause reasons; we only - # care about the primary message anyway - msgs = messages.select{|m| m =~ /Query failure/ } - expect(msgs.length).to eq(1) - m = msgs.first - expect(m).to match(/\[REDIS\] Query failure/) - expect(m).to match(/\(local_fake_redis\)/) - expect(m).to match(/InheritedError/) - end - - it "tracks failure only briefly for local redis" do - local_node = "localhost:9999" - expect(Canvas::Redis.redis_failure?(local_node)).to be_falsey - Canvas::Redis.last_redis_failure[local_node] = Time.now - expect(Canvas::Redis.redis_failure?(local_node)).to be_truthy - Timecop.travel(4) do - expect(Canvas::Redis.redis_failure?(local_node)).to be_falsey - end - end - - it "circuit breaks for standard nodes for a different amount of time" do - remote_node = "redis-test-node-42:9999" - expect(Canvas::Redis.redis_failure?(remote_node)).to be_falsey - Canvas::Redis.last_redis_failure[remote_node] = Time.now - expect(Canvas::Redis.redis_failure?(remote_node)).to be_truthy - Timecop.travel(4) do - expect(Canvas::Redis.redis_failure?(remote_node)).to be_truthy - end - Timecop.travel(400) do - expect(Canvas::Redis.redis_failure?(remote_node)).to be_falsey - end - end - end end + end