allow user to chose which vllm's merics to display in grafana (#3393)

This commit is contained in:
Allen.Dou 2024-03-14 14:35:13 +08:00 committed by GitHub
parent 81653d9688
commit a37415c31b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 88 additions and 96 deletions

View File

@ -1,35 +1,4 @@
{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__elements": {},
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "10.2.3"
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
},
{
"type": "panel",
"id": "timeseries",
"name": "Time series",
"version": ""
}
],
"annotations": {
"list": [
{
@ -42,6 +11,12 @@
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
@ -50,14 +25,14 @@
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"id": 29,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"description": "End to end request latency measured in seconds.",
"fieldConfig": {
@ -66,7 +41,6 @@
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@ -80,7 +54,6 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@ -138,11 +111,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))",
"expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": false,
@ -154,11 +127,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))",
"expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": false,
@ -171,11 +144,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))",
"expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": false,
@ -188,11 +161,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))",
"expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": false,
@ -205,10 +178,10 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"editorMode": "code",
"expr": "rate(vllm:e2e_request_latency_seconds_sum[$__rate_interval])\n/\nrate(vllm:e2e_request_latency_seconds_count[$__rate_interval])",
"expr": "rate(vllm:e2e_request_latency_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(vllm:e2e_request_latency_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
"hide": false,
"instant": false,
"legendFormat": "Average",
@ -222,7 +195,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"description": "Number of tokens processed per second",
"fieldConfig": {
@ -231,7 +204,6 @@
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@ -245,7 +217,6 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@ -302,11 +273,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "rate(vllm:prompt_tokens_total[$__rate_interval])",
"expr": "rate(vllm:prompt_tokens_total{model_name=\"$model_name\"}[$__rate_interval])",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": false,
@ -318,11 +289,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "rate(vllm:generation_tokens_total[$__rate_interval])",
"expr": "rate(vllm:generation_tokens_total{model_name=\"$model_name\"}[$__rate_interval])",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": false,
@ -339,7 +310,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"description": "Inter token latency in seconds.",
"fieldConfig": {
@ -348,7 +319,6 @@
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@ -362,7 +332,6 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@ -420,11 +389,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))",
"expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": false,
@ -436,11 +405,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))",
"expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": false,
@ -453,11 +422,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))",
"expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": false,
@ -470,11 +439,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))",
"expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": false,
@ -487,10 +456,10 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"editorMode": "code",
"expr": "rate(vllm:time_per_output_token_seconds_sum[$__rate_interval])\n/\nrate(vllm:time_per_output_token_seconds_count[$__rate_interval])",
"expr": "rate(vllm:time_per_output_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(vllm:time_per_output_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
"hide": false,
"instant": false,
"legendFormat": "Mean",
@ -504,7 +473,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"description": "Number of requests in RUNNING, WAITING, and SWAPPED state",
"fieldConfig": {
@ -513,7 +482,6 @@
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@ -527,7 +495,6 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@ -585,11 +552,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "vllm:num_requests_running",
"expr": "vllm:num_requests_running{model_name=\"$model_name\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
@ -601,11 +568,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "vllm:num_requests_swapped",
"expr": "vllm:num_requests_swapped{model_name=\"$model_name\"}",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
@ -618,11 +585,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "vllm:num_requests_waiting",
"expr": "vllm:num_requests_waiting{model_name=\"$model_name\"}",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
@ -639,7 +606,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"description": "P50, P90, P95, and P99 TTFT latency in seconds.",
"fieldConfig": {
@ -648,7 +615,6 @@
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@ -662,7 +628,6 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@ -720,11 +685,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))",
"expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": false,
@ -737,11 +702,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))",
"expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": false,
@ -753,11 +718,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))",
"expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": false,
@ -770,11 +735,11 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))",
"expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": false,
@ -787,10 +752,10 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"editorMode": "code",
"expr": "rate(vllm:time_to_first_token_seconds_sum[$__rate_interval])\n/\nrate(vllm:time_to_first_token_seconds_count[$__rate_interval])",
"expr": "rate(vllm:time_to_first_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(vllm:time_to_first_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
"hide": false,
"instant": false,
"legendFormat": "Average",
@ -804,7 +769,7 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"description": "Percentage of used cache blocks by vLLM.",
"fieldConfig": {
@ -813,7 +778,6 @@
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
@ -827,7 +791,6 @@
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
@ -885,10 +848,10 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"editorMode": "code",
"expr": "vllm:gpu_cache_usage_perc",
"expr": "vllm:gpu_cache_usage_perc{model_name=\"$model_name\"}",
"instant": false,
"legendFormat": "GPU Cache Usage",
"range": true,
@ -897,10 +860,10 @@
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
"uid": "prometheus"
},
"editorMode": "code",
"expr": "vllm:cpu_cache_usage_perc",
"expr": "vllm:cpu_cache_usage_perc{model_name=\"$model_name\"}",
"hide": false,
"instant": false,
"legendFormat": "CPU Cache Usage",
@ -913,10 +876,39 @@
}
],
"refresh": "",
"schemaVersion": 39,
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": []
"list": [
{
"current": {
"selected": false,
"text": "vllm",
"value": "vllm"
},
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"definition": "label_values(model_name)",
"hide": 0,
"includeAll": false,
"label": "model_name",
"multi": false,
"name": "model_name",
"options": [],
"query": {
"query": "label_values(model_name)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-5m",