Auto merge of #125771 - Kobzol:ci-datadog-metrics, r=jdno

[CI] Upload average CPU utilization of CI jobs to DataDog

This PR adds a new CI step that uploads the average CPU utilization of the current GH job to Datadog. I want to add more metrics in follow-up PRs.

r? `@jdno`

try-job: dist-i686-msvc
try-job: aarch64-apple
try-job: x86_64-gnu-llvm-18
This commit is contained in:
bors 2024-10-02 13:56:55 +00:00
commit 44722bd9ba
2 changed files with 91 additions and 0 deletions

View File

@ -212,6 +212,16 @@ jobs:
# erroring about invalid credentials instead.
if: github.event_name == 'push' || env.DEPLOY == '1' || env.DEPLOY_ALT == '1'
- name: upload job metrics to DataDog
if: needs.calculate_matrix.outputs.run_type != 'pr'
env:
DATADOG_SITE: datadoghq.com
DATADOG_API_KEY: ${{ secrets.DATADOG_API_KEY }}
DD_GITHUB_JOB_NAME: ${{ matrix.name }}
run: |
npm install -g @datadog/datadog-ci@^2.x.x
python3 src/ci/scripts/upload-build-metrics.py build/cpu-usage.csv
# This job isused to tell bors the final status of the build, as there is no practical way to detect
# when a workflow is successful listening to webhooks only in our current bors implementation (homu).
outcome:

View File

@ -0,0 +1,81 @@
"""
This script postprocesses data gathered during a CI run, computes certain metrics
from them, and uploads these metrics to DataDog.
This script is expected to be executed from within a GitHub Actions job.
It expects the following environment variables:
- DATADOG_SITE: path to the DataDog API endpoint
- DATADOG_API_KEY: DataDog API token
- DD_GITHUB_JOB_NAME: Name of the current GitHub Actions job
And it also expects the presence of a binary called `datadog-ci` to be in PATH.
It can be installed with `npm install -g @datadog/datadog-ci`.
Usage:
```bash
$ python3 upload-build-metrics.py <path-to-CPU-usage-CSV>
```
`path-to-CPU-usage-CSV` is a path to a CSV generated by the `src/ci/cpu-usage-over-time.py` script.
"""
import argparse
import csv
import os
import subprocess
import sys
from pathlib import Path
from typing import List
def load_cpu_usage(path: Path) -> List[float]:
usage = []
with open(path) as f:
reader = csv.reader(f, delimiter=',')
for row in reader:
# The log might contain incomplete rows or some Python exception
if len(row) == 2:
try:
idle = float(row[1])
usage.append(100.0 - idle)
except ValueError:
pass
return usage
def upload_datadog_measure(name: str, value: float):
"""
Uploads a single numeric metric for the current GitHub Actions job to DataDog.
"""
print(f"Metric {name}: {value:.4f}")
datadog_cmd = "datadog-ci"
if os.getenv("GITHUB_ACTIONS") is not None and sys.platform.lower().startswith("win"):
# Due to weird interaction of MSYS2 and Python, we need to use an absolute path,
# and also specify the ".cmd" at the end. See https://github.com/rust-lang/rust/pull/125771.
datadog_cmd = "C:\\npm\\prefix\\datadog-ci.cmd"
subprocess.run([
datadog_cmd,
"measure",
"--level", "job",
"--measures", f"{name}:{value}"
],
check=False
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="DataDog metric uploader"
)
parser.add_argument("cpu-usage-history-csv")
args = parser.parse_args()
build_usage_csv = vars(args)["cpu-usage-history-csv"]
usage_timeseries = load_cpu_usage(Path(build_usage_csv))
if len(usage_timeseries) > 0:
avg_cpu_usage = sum(usage_timeseries) / len(usage_timeseries)
else:
avg_cpu_usage = 0
upload_datadog_measure("avg-cpu-usage", avg_cpu_usage)