add jobs cluster tag
refs AE-119 Change-Id: I1b2f903a60793459feca2c0ef4d9eed9df12b508 Reviewed-on: https://gerrit.instructure.com/c/canvas-lms/+/309463 Tested-by: Service Cloud Jenkins <svc.cloudjenkins@instructure.com> Reviewed-by: Isaac Moore <isaac.moore@instructure.com> QA-Review: Aaron Ogata <aogata@instructure.com> Product-Review: Aaron Ogata <aogata@instructure.com>
This commit is contained in:
parent
feb8286b0e
commit
ca7fa755d4
|
@ -173,14 +173,13 @@ class InfoController < ApplicationController
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
def readiness(is_deep_check: false)
|
def readiness
|
||||||
# This action provides a clear signal for assessing system components that are "owned"
|
# This action provides a clear signal for assessing system components that are "owned"
|
||||||
# by Canvas and are ultimately responsible for being alive and able to serve consumer traffic
|
# by Canvas and are ultimately responsible for being alive and able to serve consumer traffic
|
||||||
|
|
||||||
components = HealthChecks.process_readiness_checks(is_deep_check)
|
components = HealthChecks.process_readiness_checks(false)
|
||||||
|
|
||||||
failed = components.reject { |_k, v| v[:status] }.map(&:first)
|
render_readiness_json(components, false)
|
||||||
render_readiness_json(components, failed.any? ? 503 : 200, is_deep_check)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def deep
|
def deep
|
||||||
|
@ -198,7 +197,10 @@ class InfoController < ApplicationController
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def render_readiness_json(components, status_code, is_deep_check)
|
def render_readiness_json(components, is_deep_check)
|
||||||
|
failed = components.reject { |_k, v| v[:status] }.map(&:first)
|
||||||
|
status_code = failed.any? ? 503 : 200
|
||||||
|
|
||||||
readiness_json = { status: status_code, components: components_to_hash(components) }
|
readiness_json = { status: status_code, components: components_to_hash(components) }
|
||||||
return readiness_json if is_deep_check
|
return readiness_json if is_deep_check
|
||||||
|
|
||||||
|
@ -206,14 +208,24 @@ class InfoController < ApplicationController
|
||||||
end
|
end
|
||||||
|
|
||||||
def render_deep_json(critical, secondary, status_code)
|
def render_deep_json(critical, secondary, status_code)
|
||||||
readiness_response = readiness(is_deep_check: true)
|
components = HealthChecks.process_readiness_checks(true)
|
||||||
|
readiness_response = render_readiness_json(components, true)
|
||||||
|
|
||||||
status = readiness_response[:status] == 503 ? readiness_response[:status] : status_code
|
status = readiness_response[:status] == 503 ? readiness_response[:status] : status_code
|
||||||
|
|
||||||
|
response = {
|
||||||
|
readiness: components,
|
||||||
|
critical: critical,
|
||||||
|
secondary: secondary,
|
||||||
|
}
|
||||||
|
|
||||||
|
HealthChecks.send_to_statsd(response, { cluster: Shard.current.database_server_id })
|
||||||
|
|
||||||
render json: {
|
render json: {
|
||||||
status: status,
|
status: status,
|
||||||
readiness: readiness_response,
|
readiness: readiness_response,
|
||||||
critical: components_to_hash(critical),
|
critical: components_to_hash(critical),
|
||||||
secondary: components_to_hash(secondary)
|
secondary: components_to_hash(secondary),
|
||||||
}, status: status
|
}, status: status
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -354,13 +354,4 @@ Rails.configuration.after_initialize do
|
||||||
singleton: "Canvas::LiveEvents#heartbeat" }
|
singleton: "Canvas::LiveEvents#heartbeat" }
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
Delayed::Periodic.cron "HealthChecks.send_to_statsd", "* * * * *" do
|
|
||||||
DatabaseServer.send_in_each_region(
|
|
||||||
HealthChecks,
|
|
||||||
:send_to_statsd,
|
|
||||||
{ run_current_region_asynchronously: true,
|
|
||||||
singleton: "HealthChecks#send_to_statsd" }
|
|
||||||
)
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -38,12 +38,12 @@ module HealthChecks
|
||||||
{ critical: critical, secondary: secondary }
|
{ critical: critical, secondary: secondary }
|
||||||
end
|
end
|
||||||
|
|
||||||
def send_to_statsd
|
def send_to_statsd(result = nil, additional_tags = {})
|
||||||
result = process_deep_checks.merge({ readiness: process_readiness_checks(true) })
|
result ||= process_deep_checks.merge({ readiness: process_readiness_checks(true) })
|
||||||
|
|
||||||
result.each do |check_type, check_values|
|
result.each do |check_type, check_values|
|
||||||
check_values.each do |check_name, check_results|
|
check_values.each do |check_name, check_results|
|
||||||
tags = { type: check_type, key: check_name }
|
tags = { type: check_type, key: check_name, **additional_tags }
|
||||||
|
|
||||||
InstStatsd::Statsd.timing("canvas.health_checks.response_time_ms", check_results[:time], tags: tags)
|
InstStatsd::Statsd.timing("canvas.health_checks.response_time_ms", check_results[:time], tags: tags)
|
||||||
InstStatsd::Statsd.gauge("canvas.health_checks.status", check_results[:status] ? 1 : 0, tags: tags)
|
InstStatsd::Statsd.gauge("canvas.health_checks.status", check_results[:status] ? 1 : 0, tags: tags)
|
||||||
|
|
|
@ -181,6 +181,16 @@ describe InfoController do
|
||||||
expect(json["readiness"]["components"].count).to be > 0
|
expect(json["readiness"]["components"].count).to be > 0
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "reports to statsd upon loading the deep endpoint" do
|
||||||
|
allow(InstStatsd::Statsd).to receive(:gauge)
|
||||||
|
allow(InstStatsd::Statsd).to receive(:timing)
|
||||||
|
allow(Shard.current).to receive(:database_server_id).and_return("C1")
|
||||||
|
|
||||||
|
get "deep"
|
||||||
|
expect(response).to be_successful
|
||||||
|
expect(InstStatsd::Statsd).to have_received(:gauge).with("canvas.health_checks.status", 1, tags: { type: :readiness, key: :common_css, cluster: "C1" })
|
||||||
|
end
|
||||||
|
|
||||||
it "responds with 503 if a readiness system component is considered down" do
|
it "responds with 503 if a readiness system component is considered down" do
|
||||||
allow(Delayed::Job.connection).to receive(:active?).and_return(false)
|
allow(Delayed::Job.connection).to receive(:active?).and_return(false)
|
||||||
get "deep"
|
get "deep"
|
||||||
|
|
|
@ -169,4 +169,47 @@ describe HealthChecks do
|
||||||
expect(InstStatsd::Statsd).to have_received(:timing).with("canvas.health_checks.response_time_ms", 3, tags: { type: :deep, key: :deep_check_name_error })
|
expect(InstStatsd::Statsd).to have_received(:timing).with("canvas.health_checks.response_time_ms", 3, tags: { type: :deep, key: :deep_check_name_error })
|
||||||
expect(InstStatsd::Statsd).to have_received(:timing).with("canvas.health_checks.response_time_ms", 4, tags: { type: :deep, key: :deep_check_name_success })
|
expect(InstStatsd::Statsd).to have_received(:timing).with("canvas.health_checks.response_time_ms", 4, tags: { type: :deep, key: :deep_check_name_success })
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "reports pre-computed metrics to statsd" do
|
||||||
|
allow(InstStatsd::Statsd).to receive(:gauge)
|
||||||
|
allow(InstStatsd::Statsd).to receive(:timing)
|
||||||
|
|
||||||
|
HealthChecks.send_to_statsd(
|
||||||
|
{
|
||||||
|
readiness: {
|
||||||
|
readiness_check_name_error: { time: 1, status: false },
|
||||||
|
readiness_check_name_success: { time: 2, status: true },
|
||||||
|
},
|
||||||
|
deep: {
|
||||||
|
deep_check_name_error: { time: 3, status: false },
|
||||||
|
deep_check_name_success: { time: 4, status: true },
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(InstStatsd::Statsd).to have_received(:gauge).with("canvas.health_checks.status", 0, tags: { type: :deep, key: :deep_check_name_error })
|
||||||
|
expect(InstStatsd::Statsd).to have_received(:gauge).with("canvas.health_checks.status", 0, tags: { type: :readiness, key: :readiness_check_name_error })
|
||||||
|
expect(InstStatsd::Statsd).to have_received(:gauge).with("canvas.health_checks.status", 1, tags: { type: :deep, key: :deep_check_name_success })
|
||||||
|
expect(InstStatsd::Statsd).to have_received(:gauge).with("canvas.health_checks.status", 1, tags: { type: :readiness, key: :readiness_check_name_success })
|
||||||
|
expect(InstStatsd::Statsd).to have_received(:timing).with("canvas.health_checks.response_time_ms", 1, tags: { type: :readiness, key: :readiness_check_name_error })
|
||||||
|
expect(InstStatsd::Statsd).to have_received(:timing).with("canvas.health_checks.response_time_ms", 2, tags: { type: :readiness, key: :readiness_check_name_success })
|
||||||
|
expect(InstStatsd::Statsd).to have_received(:timing).with("canvas.health_checks.response_time_ms", 3, tags: { type: :deep, key: :deep_check_name_error })
|
||||||
|
expect(InstStatsd::Statsd).to have_received(:timing).with("canvas.health_checks.response_time_ms", 4, tags: { type: :deep, key: :deep_check_name_success })
|
||||||
|
end
|
||||||
|
|
||||||
|
it "adds additional tags to the reported metrics" do
|
||||||
|
allow(InstStatsd::Statsd).to receive(:gauge)
|
||||||
|
allow(InstStatsd::Statsd).to receive(:timing)
|
||||||
|
|
||||||
|
HealthChecks.send_to_statsd(
|
||||||
|
{
|
||||||
|
readiness: {
|
||||||
|
readiness_check_name_error: { time: 1, status: false },
|
||||||
|
},
|
||||||
|
}, { cluster: "C1" }
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(InstStatsd::Statsd).to have_received(:gauge).with("canvas.health_checks.status", 0, tags: { type: :readiness, key: :readiness_check_name_error, cluster: "C1" })
|
||||||
|
expect(InstStatsd::Statsd).to have_received(:timing).with("canvas.health_checks.response_time_ms", 1, tags: { type: :readiness, key: :readiness_check_name_error, cluster: "C1" })
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue