add jobs cluster tag
refs AE-119 Change-Id: I1b2f903a60793459feca2c0ef4d9eed9df12b508 Reviewed-on: https://gerrit.instructure.com/c/canvas-lms/+/309463 Tested-by: Service Cloud Jenkins <svc.cloudjenkins@instructure.com> Reviewed-by: Isaac Moore <isaac.moore@instructure.com> QA-Review: Aaron Ogata <aogata@instructure.com> Product-Review: Aaron Ogata <aogata@instructure.com>
This commit is contained in:
parent
feb8286b0e
commit
ca7fa755d4
|
@ -173,14 +173,13 @@ class InfoController < ApplicationController
|
|||
}
|
||||
end
|
||||
|
||||
def readiness(is_deep_check: false)
|
||||
def readiness
|
||||
# This action provides a clear signal for assessing system components that are "owned"
|
||||
# by Canvas and are ultimately responsible for being alive and able to serve consumer traffic
|
||||
|
||||
components = HealthChecks.process_readiness_checks(is_deep_check)
|
||||
components = HealthChecks.process_readiness_checks(false)
|
||||
|
||||
failed = components.reject { |_k, v| v[:status] }.map(&:first)
|
||||
render_readiness_json(components, failed.any? ? 503 : 200, is_deep_check)
|
||||
render_readiness_json(components, false)
|
||||
end
|
||||
|
||||
def deep
|
||||
|
@ -198,7 +197,10 @@ class InfoController < ApplicationController
|
|||
|
||||
private
|
||||
|
||||
def render_readiness_json(components, status_code, is_deep_check)
|
||||
def render_readiness_json(components, is_deep_check)
|
||||
failed = components.reject { |_k, v| v[:status] }.map(&:first)
|
||||
status_code = failed.any? ? 503 : 200
|
||||
|
||||
readiness_json = { status: status_code, components: components_to_hash(components) }
|
||||
return readiness_json if is_deep_check
|
||||
|
||||
|
@ -206,14 +208,24 @@ class InfoController < ApplicationController
|
|||
end
|
||||
|
||||
def render_deep_json(critical, secondary, status_code)
|
||||
readiness_response = readiness(is_deep_check: true)
|
||||
components = HealthChecks.process_readiness_checks(true)
|
||||
readiness_response = render_readiness_json(components, true)
|
||||
|
||||
status = readiness_response[:status] == 503 ? readiness_response[:status] : status_code
|
||||
|
||||
response = {
|
||||
readiness: components,
|
||||
critical: critical,
|
||||
secondary: secondary,
|
||||
}
|
||||
|
||||
HealthChecks.send_to_statsd(response, { cluster: Shard.current.database_server_id })
|
||||
|
||||
render json: {
|
||||
status: status,
|
||||
readiness: readiness_response,
|
||||
critical: components_to_hash(critical),
|
||||
secondary: components_to_hash(secondary)
|
||||
secondary: components_to_hash(secondary),
|
||||
}, status: status
|
||||
end
|
||||
|
||||
|
|
|
@ -354,13 +354,4 @@ Rails.configuration.after_initialize do
|
|||
singleton: "Canvas::LiveEvents#heartbeat" }
|
||||
)
|
||||
end
|
||||
|
||||
Delayed::Periodic.cron "HealthChecks.send_to_statsd", "* * * * *" do
|
||||
DatabaseServer.send_in_each_region(
|
||||
HealthChecks,
|
||||
:send_to_statsd,
|
||||
{ run_current_region_asynchronously: true,
|
||||
singleton: "HealthChecks#send_to_statsd" }
|
||||
)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -38,12 +38,12 @@ module HealthChecks
|
|||
{ critical: critical, secondary: secondary }
|
||||
end
|
||||
|
||||
def send_to_statsd
|
||||
result = process_deep_checks.merge({ readiness: process_readiness_checks(true) })
|
||||
def send_to_statsd(result = nil, additional_tags = {})
|
||||
result ||= process_deep_checks.merge({ readiness: process_readiness_checks(true) })
|
||||
|
||||
result.each do |check_type, check_values|
|
||||
check_values.each do |check_name, check_results|
|
||||
tags = { type: check_type, key: check_name }
|
||||
tags = { type: check_type, key: check_name, **additional_tags }
|
||||
|
||||
InstStatsd::Statsd.timing("canvas.health_checks.response_time_ms", check_results[:time], tags: tags)
|
||||
InstStatsd::Statsd.gauge("canvas.health_checks.status", check_results[:status] ? 1 : 0, tags: tags)
|
||||
|
|
|
@ -181,6 +181,16 @@ describe InfoController do
|
|||
expect(json["readiness"]["components"].count).to be > 0
|
||||
end
|
||||
|
||||
it "reports to statsd upon loading the deep endpoint" do
|
||||
allow(InstStatsd::Statsd).to receive(:gauge)
|
||||
allow(InstStatsd::Statsd).to receive(:timing)
|
||||
allow(Shard.current).to receive(:database_server_id).and_return("C1")
|
||||
|
||||
get "deep"
|
||||
expect(response).to be_successful
|
||||
expect(InstStatsd::Statsd).to have_received(:gauge).with("canvas.health_checks.status", 1, tags: { type: :readiness, key: :common_css, cluster: "C1" })
|
||||
end
|
||||
|
||||
it "responds with 503 if a readiness system component is considered down" do
|
||||
allow(Delayed::Job.connection).to receive(:active?).and_return(false)
|
||||
get "deep"
|
||||
|
|
|
@ -169,4 +169,47 @@ describe HealthChecks do
|
|||
expect(InstStatsd::Statsd).to have_received(:timing).with("canvas.health_checks.response_time_ms", 3, tags: { type: :deep, key: :deep_check_name_error })
|
||||
expect(InstStatsd::Statsd).to have_received(:timing).with("canvas.health_checks.response_time_ms", 4, tags: { type: :deep, key: :deep_check_name_success })
|
||||
end
|
||||
|
||||
it "reports pre-computed metrics to statsd" do
|
||||
allow(InstStatsd::Statsd).to receive(:gauge)
|
||||
allow(InstStatsd::Statsd).to receive(:timing)
|
||||
|
||||
HealthChecks.send_to_statsd(
|
||||
{
|
||||
readiness: {
|
||||
readiness_check_name_error: { time: 1, status: false },
|
||||
readiness_check_name_success: { time: 2, status: true },
|
||||
},
|
||||
deep: {
|
||||
deep_check_name_error: { time: 3, status: false },
|
||||
deep_check_name_success: { time: 4, status: true },
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
expect(InstStatsd::Statsd).to have_received(:gauge).with("canvas.health_checks.status", 0, tags: { type: :deep, key: :deep_check_name_error })
|
||||
expect(InstStatsd::Statsd).to have_received(:gauge).with("canvas.health_checks.status", 0, tags: { type: :readiness, key: :readiness_check_name_error })
|
||||
expect(InstStatsd::Statsd).to have_received(:gauge).with("canvas.health_checks.status", 1, tags: { type: :deep, key: :deep_check_name_success })
|
||||
expect(InstStatsd::Statsd).to have_received(:gauge).with("canvas.health_checks.status", 1, tags: { type: :readiness, key: :readiness_check_name_success })
|
||||
expect(InstStatsd::Statsd).to have_received(:timing).with("canvas.health_checks.response_time_ms", 1, tags: { type: :readiness, key: :readiness_check_name_error })
|
||||
expect(InstStatsd::Statsd).to have_received(:timing).with("canvas.health_checks.response_time_ms", 2, tags: { type: :readiness, key: :readiness_check_name_success })
|
||||
expect(InstStatsd::Statsd).to have_received(:timing).with("canvas.health_checks.response_time_ms", 3, tags: { type: :deep, key: :deep_check_name_error })
|
||||
expect(InstStatsd::Statsd).to have_received(:timing).with("canvas.health_checks.response_time_ms", 4, tags: { type: :deep, key: :deep_check_name_success })
|
||||
end
|
||||
|
||||
it "adds additional tags to the reported metrics" do
|
||||
allow(InstStatsd::Statsd).to receive(:gauge)
|
||||
allow(InstStatsd::Statsd).to receive(:timing)
|
||||
|
||||
HealthChecks.send_to_statsd(
|
||||
{
|
||||
readiness: {
|
||||
readiness_check_name_error: { time: 1, status: false },
|
||||
},
|
||||
}, { cluster: "C1" }
|
||||
)
|
||||
|
||||
expect(InstStatsd::Statsd).to have_received(:gauge).with("canvas.health_checks.status", 0, tags: { type: :readiness, key: :readiness_check_name_error, cluster: "C1" })
|
||||
expect(InstStatsd::Statsd).to have_received(:timing).with("canvas.health_checks.response_time_ms", 1, tags: { type: :readiness, key: :readiness_check_name_error, cluster: "C1" })
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue