version 5.1
This commit is contained in:
parent
7a2b07eebd
commit
6e3ad3dd6b
|
@ -0,0 +1,38 @@
|
|||
.PHONY: start build
|
||||
|
||||
NOW = $(shell date -u '+%Y%m%d%I%M%S')
|
||||
|
||||
RELEASE_VERSION = 5.1.0
|
||||
|
||||
APP = n9e
|
||||
SERVER_BIN = ${APP}
|
||||
# RELEASE_ROOT = release
|
||||
# RELEASE_SERVER = release/${APP}
|
||||
# GIT_COUNT = $(shell git rev-list --all --count)
|
||||
# GIT_HASH = $(shell git rev-parse --short HEAD)
|
||||
# RELEASE_TAG = $(RELEASE_VERSION).$(GIT_COUNT).$(GIT_HASH)
|
||||
|
||||
all: build
|
||||
|
||||
build:
|
||||
@go build -ldflags "-w -s -X main.VERSION=$(RELEASE_VERSION)" -o $(SERVER_BIN) ./src
|
||||
|
||||
# start:
|
||||
# @go run -ldflags "-X main.VERSION=$(RELEASE_TAG)" ./cmd/${APP}/main.go web -c ./configs/config.toml -m ./configs/model.conf --menu ./configs/menu.yaml
|
||||
|
||||
# swagger:
|
||||
# @swag init --parseDependency --generalInfo ./cmd/${APP}/main.go --output ./internal/app/swagger
|
||||
|
||||
# wire:
|
||||
# @wire gen ./internal/app
|
||||
|
||||
# test:
|
||||
# cd ./internal/app/test && go test -v
|
||||
|
||||
# clean:
|
||||
# rm -rf data release $(SERVER_BIN) internal/app/test/data cmd/${APP}/data
|
||||
|
||||
# pack: build
|
||||
# rm -rf $(RELEASE_ROOT) && mkdir -p $(RELEASE_SERVER)
|
||||
# cp -r $(SERVER_BIN) configs $(RELEASE_SERVER)
|
||||
# cd $(RELEASE_ROOT) && tar -cvf $(APP).tar ${APP} && rm -rf ${APP}
|
20
README.md
20
README.md
|
@ -1,10 +1,18 @@
|
|||
## 基本信息
|
||||
|
||||
- 官网:[n9e.didiyun.com](https://n9e.didiyun.com/) 右上角切换版本
|
||||
- 招聘:前后端都要,base北京,薪资open,可将简历发至邮箱 `echo cWlueWVuaW5nQGRpZGlnbG9iYWwuY29t | base64 -d` 一起来做开源
|
||||
|
||||
## 大本营
|
||||
|
||||
微信公众号:`__n9e__`(夜莺监控)
|
||||
微信公号:`__n9e__`(夜莺监控)
|
||||
知识星球:夜莺开源社区
|
||||
|
||||
钉钉交流群:
|
||||
|
||||
|
||||
|
||||
# todo
|
||||
|
||||
- [x] deploy nightingale in docker
|
||||
- [x] export /metrics endpoint
|
||||
- [ ] notify.py support feishu
|
||||
- [ ] notify.py support sms
|
||||
- [ ] notify.py support voice
|
||||
|
||||
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
package alert
|
||||
|
||||
import (
|
||||
"context"
|
||||
)
|
||||
|
||||
func Start(ctx context.Context) {
|
||||
go popEvent()
|
||||
}
|
325
alert/consume.go
325
alert/consume.go
|
@ -1,325 +0,0 @@
|
|||
package alert
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/didi/nightingale/v5/cache"
|
||||
"github.com/didi/nightingale/v5/config"
|
||||
"github.com/didi/nightingale/v5/judge"
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
|
||||
"github.com/toolkits/pkg/concurrent/semaphore"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/net/httplib"
|
||||
"github.com/toolkits/pkg/sys"
|
||||
)
|
||||
|
||||
func popEvent() {
|
||||
sema := semaphore.NewSemaphore(config.Config.Alert.NotifyScriptConcurrency)
|
||||
duration := time.Duration(100) * time.Millisecond
|
||||
for {
|
||||
events := judge.EventQueue.PopBackBy(200)
|
||||
if len(events) < 1 {
|
||||
time.Sleep(duration)
|
||||
continue
|
||||
}
|
||||
consume(events, sema)
|
||||
}
|
||||
}
|
||||
|
||||
func consume(events []interface{}, sema *semaphore.Semaphore) {
|
||||
for i := range events {
|
||||
if events[i] == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
event := events[i].(*models.AlertEvent)
|
||||
|
||||
alertRule, exists := cache.AlertRules.Get(event.RuleId)
|
||||
if !exists {
|
||||
logger.Errorf("event_consume: alert rule not found, event:%+v", event)
|
||||
continue
|
||||
}
|
||||
logger.Debugf("[event_consume_success][type:%v][event:%+v]", event.IsPromePull, event)
|
||||
if isNoneffective(event, alertRule) {
|
||||
// 告警规则非生效时段
|
||||
continue
|
||||
}
|
||||
|
||||
event.RuleName = alertRule.Name
|
||||
event.RuleNote = alertRule.Note
|
||||
event.NotifyChannels = alertRule.NotifyChannels
|
||||
classpaths := cache.ResClasspath.GetValues(event.ResIdent)
|
||||
sort.Strings(classpaths)
|
||||
event.ResClasspaths = strings.Join(classpaths, " ")
|
||||
enrichTag(event, alertRule)
|
||||
|
||||
if isEventMute(event) && event.IsAlert() {
|
||||
// 被屏蔽的事件
|
||||
event.MarkMuted()
|
||||
|
||||
if config.Config.Alert.MutedAlertPersist {
|
||||
persist(event)
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// 操作数据库
|
||||
persist(event)
|
||||
|
||||
// 不管是告警还是恢复,都触发回调,接收端自己处理
|
||||
if alertRule.Callbacks != "" {
|
||||
go callback(event, alertRule)
|
||||
}
|
||||
|
||||
uids := genNotifyUserIDs(alertRule)
|
||||
if len(uids) == 0 {
|
||||
logger.Warningf("event_consume: notify users not found, event_hash_id: %s, rule_id: %d, rule_name: %s, res_ident: %s", event.HashId, event.RuleId, event.RuleName, event.ResIdent)
|
||||
continue
|
||||
}
|
||||
|
||||
users := cache.UserCache.GetByIds(uids)
|
||||
if len(users) == 0 {
|
||||
logger.Warningf("event_consume: notify users not found, event_hash_id: %s, rule_id: %d, rule_name: %s, res_ident: %s", event.HashId, event.RuleId, event.RuleName, event.ResIdent)
|
||||
continue
|
||||
}
|
||||
|
||||
alertMsg := AlertMsg{
|
||||
Event: event,
|
||||
Rule: alertRule,
|
||||
Users: users,
|
||||
}
|
||||
|
||||
logger.Infof("event_consume: notify alert:%+v", alertMsg)
|
||||
|
||||
sema.Acquire()
|
||||
go func(alertMsg AlertMsg) {
|
||||
defer sema.Release()
|
||||
notify(alertMsg)
|
||||
}(alertMsg)
|
||||
}
|
||||
}
|
||||
|
||||
func genNotifyUserIDs(alertRule *models.AlertRule) []int64 {
|
||||
uidMap := make(map[int64]struct{})
|
||||
|
||||
groupIds := strings.Fields(alertRule.NotifyGroups)
|
||||
for _, groupId := range groupIds {
|
||||
gid, err := strconv.ParseInt(groupId, 10, 64)
|
||||
if err != nil {
|
||||
logger.Warningf("event_consume: strconv groupid(%s) fail: %v", groupId, err)
|
||||
continue
|
||||
}
|
||||
|
||||
um, exists := cache.UserGroupMember.Get(gid)
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
for uid := range um {
|
||||
uidMap[uid] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
userIds := strings.Fields(alertRule.NotifyUsers)
|
||||
for _, userId := range userIds {
|
||||
uid, err := strconv.ParseInt(userId, 10, 64)
|
||||
if err != nil {
|
||||
logger.Warningf("event_consume: strconv userid(%s) fail: %v", userId, err)
|
||||
continue
|
||||
}
|
||||
|
||||
uidMap[uid] = struct{}{}
|
||||
}
|
||||
|
||||
uids := make([]int64, 0, len(uidMap))
|
||||
for uid := range uidMap {
|
||||
uids = append(uids, uid)
|
||||
}
|
||||
|
||||
return uids
|
||||
}
|
||||
|
||||
// 如果是告警,就存库,如果是恢复,就从未恢复的告警表里删除
|
||||
func persist(event *models.AlertEvent) {
|
||||
if event.IsRecov() {
|
||||
logger.Debugf("[event.Recovery.db.DelByHashId]: delete recovery event:%+v", event)
|
||||
err := event.DelByHashId()
|
||||
if err != nil {
|
||||
logger.Warningf("event_consume: delete recovery event err:%v, event:%+v", err, event)
|
||||
}
|
||||
} else {
|
||||
err := event.Add()
|
||||
if err != nil {
|
||||
logger.Warningf("event_consume: insert alert event err:%v, event:%+v", err, event)
|
||||
}
|
||||
}
|
||||
obj := ToHistoryAlertEvent(event)
|
||||
err := obj.Add()
|
||||
if err != nil {
|
||||
logger.Warningf("event_consume: insert history alert event err:%v, event:%+v", err, event)
|
||||
}
|
||||
}
|
||||
|
||||
type AlertMsg struct {
|
||||
Event *models.AlertEvent `json:"event"`
|
||||
Rule *models.AlertRule `json:"rule"`
|
||||
Users []*models.User `json:"users"`
|
||||
}
|
||||
|
||||
func notify(alertMsg AlertMsg) {
|
||||
//增加并发控制
|
||||
bs, err := json.Marshal(alertMsg)
|
||||
if err != nil {
|
||||
logger.Errorf("notify: marshal alert %+v err:%v", alertMsg, err)
|
||||
}
|
||||
|
||||
fpath := config.Config.Alert.NotifyScriptPath
|
||||
cmd := exec.Command(fpath)
|
||||
cmd.Stdin = bytes.NewReader(bs)
|
||||
|
||||
// combine stdout and stderr
|
||||
var buf bytes.Buffer
|
||||
cmd.Stdout = &buf
|
||||
cmd.Stderr = &buf
|
||||
|
||||
err = cmd.Start()
|
||||
if err != nil {
|
||||
logger.Errorf("notify: run cmd err:%v", err)
|
||||
return
|
||||
}
|
||||
|
||||
err, isTimeout := sys.WrapTimeout(cmd, time.Duration(10)*time.Second)
|
||||
|
||||
if isTimeout {
|
||||
if err == nil {
|
||||
logger.Errorf("notify: timeout and killed process %s", fpath)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("notify: kill process %s occur error %v", fpath, err)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("notify: exec script %s occur error: %v, output: %s", fpath, err, buf.String())
|
||||
return
|
||||
}
|
||||
|
||||
logger.Infof("notify: exec %s output: %s", fpath, buf.String())
|
||||
}
|
||||
|
||||
func callback(event *models.AlertEvent, alertRule *models.AlertRule) {
|
||||
urls := strings.Fields(alertRule.Callbacks)
|
||||
for _, url := range urls {
|
||||
if url == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if !(strings.HasPrefix(url, "http://") || strings.HasPrefix(url, "https://")) {
|
||||
url = "http://" + url
|
||||
}
|
||||
|
||||
resp, code, err := httplib.PostJSON(url, 5*time.Second, event, map[string]string{})
|
||||
if err != nil {
|
||||
logger.Errorf("callback[%s] fail, callback content: %+v, resp: %s, err: %v, code:%d", url, event, string(resp), err, code)
|
||||
} else {
|
||||
logger.Infof("callback[%s] succ, callback content: %+v, resp: %s, code:%d", url, event, string(resp), code)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func isNoneffective(event *models.AlertEvent, alertRule *models.AlertRule) bool {
|
||||
// 生效时间过滤
|
||||
if alertRule.Status == models.ALERT_RULE_DISABLED {
|
||||
logger.Debugf("event:%+v alert rule:%+v disable", event, alertRule)
|
||||
return true
|
||||
}
|
||||
|
||||
tm := time.Unix(event.TriggerTime, 0)
|
||||
triggerTime := tm.Format("15:04")
|
||||
triggerWeek := strconv.Itoa(int(tm.Weekday()))
|
||||
|
||||
if alertRule.EnableStime <= alertRule.EnableEtime {
|
||||
if triggerTime < alertRule.EnableStime || triggerTime > alertRule.EnableEtime {
|
||||
logger.Debugf("event:%+v alert rule:%+v triggerTime Noneffective", event, alertRule)
|
||||
return true
|
||||
}
|
||||
} else {
|
||||
if triggerTime < alertRule.EnableStime && triggerTime > alertRule.EnableEtime {
|
||||
logger.Debugf("event:%+v alert rule:%+v triggerTime Noneffective", event, alertRule)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
alertRule.EnableDaysOfWeek = strings.Replace(alertRule.EnableDaysOfWeek, "7", "0", 1)
|
||||
|
||||
if !strings.Contains(alertRule.EnableDaysOfWeek, triggerWeek) {
|
||||
logger.Debugf("event:%+v alert rule:%+v triggerWeek Noneffective", event, alertRule)
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// 事件的tags有多种tags组成:ident作为一个tag,数据本身的tags(前期已经把res的tags也附到数据tags里了)、规则的tags
|
||||
func enrichTag(event *models.AlertEvent, alertRule *models.AlertRule) {
|
||||
if event.ResIdent != "" {
|
||||
event.TagMap["ident"] = event.ResIdent
|
||||
}
|
||||
|
||||
if alertRule.AppendTags != "" {
|
||||
appendTags := strings.Fields(alertRule.AppendTags)
|
||||
for _, tag := range appendTags {
|
||||
arr := strings.Split(tag, "=")
|
||||
if len(arr) != 2 {
|
||||
logger.Warningf("alertRule AppendTags:%+v illagel", alertRule.AppendTags)
|
||||
continue
|
||||
}
|
||||
event.TagMap[arr[0]] = arr[1]
|
||||
}
|
||||
}
|
||||
|
||||
var tagList []string
|
||||
for key, value := range event.TagMap {
|
||||
tagList = append(tagList, fmt.Sprintf("%s=%s", key, value))
|
||||
}
|
||||
sort.Strings(tagList)
|
||||
event.Tags = strings.Join(tagList, " ")
|
||||
}
|
||||
|
||||
func ToHistoryAlertEvent(ae *models.AlertEvent) *models.HistoryAlertEvent {
|
||||
var obj models.HistoryAlertEvent
|
||||
obj.RuleId = ae.RuleId
|
||||
obj.RuleName = ae.RuleName
|
||||
obj.RuleNote = ae.RuleNote
|
||||
obj.HashId = ae.HashId
|
||||
obj.IsPromePull = ae.IsPromePull
|
||||
obj.ResClasspaths = ae.ResClasspaths
|
||||
obj.ResIdent = ae.ResIdent
|
||||
obj.Priority = ae.Priority
|
||||
obj.Status = ae.Status
|
||||
obj.IsRecovery = ae.IsRecovery
|
||||
obj.HistoryPoints = ae.HistoryPoints
|
||||
obj.TriggerTime = ae.TriggerTime
|
||||
obj.Values = ae.Values
|
||||
obj.NotifyChannels = ae.NotifyChannels
|
||||
obj.NotifyGroups = ae.NotifyGroups
|
||||
obj.NotifyUsers = ae.NotifyUsers
|
||||
obj.RunbookUrl = ae.RunbookUrl
|
||||
obj.ReadableExpression = ae.ReadableExpression
|
||||
obj.Tags = ae.Tags
|
||||
obj.NotifyGroupObjs = ae.NotifyGroupObjs
|
||||
obj.NotifyUserObjs = ae.NotifyUserObjs
|
||||
return &obj
|
||||
}
|
|
@ -1,89 +0,0 @@
|
|||
package alert
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/didi/nightingale/v5/cache"
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
func isEventMute(event *models.AlertEvent) bool {
|
||||
historyPoints, err := event.GetHistoryPoints()
|
||||
if err != nil {
|
||||
logger.Errorf("get event HistoryPoints:%+v failed, err: %v", event.HistoryPoints, err)
|
||||
return false
|
||||
}
|
||||
|
||||
// 先去匹配一下metric为空的mute
|
||||
if matchMute("", event.ResIdent, event.TagMap, event.ResClasspaths) {
|
||||
return true
|
||||
}
|
||||
|
||||
// 如果是与条件,就会有多个metric,任一个匹配了屏蔽规则都算被屏蔽
|
||||
for i := 0; i < len(historyPoints); i++ {
|
||||
if matchMute(historyPoints[i].Metric, event.ResIdent, event.TagMap, event.ResClasspaths) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
resAndTags, exists := cache.ResTags.Get(event.ResIdent)
|
||||
if exists {
|
||||
if event.TriggerTime > resAndTags.Resource.MuteBtime && event.TriggerTime < resAndTags.Resource.MuteEtime {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func matchMute(metric, ident string, tags map[string]string, classpaths string) bool {
|
||||
filters, exists := cache.AlertMute.GetByKey(metric)
|
||||
if !exists {
|
||||
// 没有屏蔽规则跟这个事件相关
|
||||
return false
|
||||
}
|
||||
|
||||
// 只要有一个屏蔽规则命中,那这个事件就是被屏蔽了
|
||||
for _, filter := range filters {
|
||||
if matchMuteOnce(filter, ident, tags, classpaths) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func matchMuteOnce(filter cache.Filter, ident string, tags map[string]string, classpaths string) bool {
|
||||
if len(filter.ClasspathPrefix) > 0 && !strings.HasPrefix(classpaths, filter.ClasspathPrefix) && !strings.Contains(classpaths, " "+filter.ClasspathPrefix) {
|
||||
// 没配置分组屏蔽就不做后续比较
|
||||
// 比如事件的资源calsspath为“n9e.mon n9e.rdb ccp.web”,配置屏蔽为n9e.rdb
|
||||
// 只要字符串前缀为n9e.rdb或者字符串包含“ n9e.rdb”即可判断所有alsspath中是否有前缀为n9e.rdb的
|
||||
// 只要有任一点不满足,那这个屏蔽规则也没有继续验证下去的必要
|
||||
return false
|
||||
}
|
||||
|
||||
if filter.ResReg != nil && !filter.ResReg.MatchString(ident) {
|
||||
// 比如屏蔽规则配置的是:c3-ceph.*
|
||||
// 当前事件的资源标识是:c4-ceph01.bj
|
||||
// 只要有任一点不满足,那这个屏蔽规则也没有继续验证下去的必要
|
||||
return false
|
||||
}
|
||||
|
||||
// 每个mute中的tags都得出现在event.tags,否则就是不匹配
|
||||
return mapContains(tags, filter.TagsMap)
|
||||
}
|
||||
|
||||
func mapContains(big, small map[string]string) bool {
|
||||
for tagk, tagv := range small {
|
||||
val, exists := big[tagk]
|
||||
if !exists {
|
||||
return false
|
||||
}
|
||||
|
||||
if val != tagv {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
|
@ -1,89 +0,0 @@
|
|||
package backend
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/prometheus/prometheus/promql"
|
||||
|
||||
"github.com/didi/nightingale/v5/vos"
|
||||
"github.com/toolkits/pkg/container/list"
|
||||
|
||||
pp "github.com/didi/nightingale/v5/backend/prome"
|
||||
)
|
||||
|
||||
type BackendSection struct {
|
||||
DataSource string `yaml:"datasource"`
|
||||
Prometheus pp.PromeSection `yaml:"prometheus"`
|
||||
}
|
||||
|
||||
type DataSource interface {
|
||||
PushEndpoint
|
||||
|
||||
QueryData(inputs vos.DataQueryParam) []*vos.DataQueryResp // 查询一段时间
|
||||
QueryDataInstant(ql string) []*vos.DataQueryInstanceResp // 查询一个时间点数据 等同于prometheus instant_query
|
||||
QueryTagKeys(recv vos.CommonTagQueryParam) *vos.TagKeyQueryResp // 获取标签的names
|
||||
QueryTagValues(recv vos.CommonTagQueryParam) *vos.TagValueQueryResp // 根据一个label_name获取 values
|
||||
QueryTagPairs(recv vos.CommonTagQueryParam) *vos.TagPairQueryResp // 根据匹配拿到所有 series 上面三个使用统一的结构体
|
||||
QueryMetrics(recv vos.MetricQueryParam) *vos.MetricQueryResp // 根据标签查 metric_names
|
||||
QueryVector(ql string) promql.Vector // prometheus pull alert 所用,其他数据源留空即可
|
||||
CleanUp() // 数据源退出时需要做的清理工作
|
||||
}
|
||||
|
||||
type PushEndpoint interface {
|
||||
Push2Queue(items []*vos.MetricPoint)
|
||||
}
|
||||
|
||||
var (
|
||||
defaultDataSource string
|
||||
registryDataSources = make(map[string]DataSource)
|
||||
registryPushEndpoints = make(map[string]PushEndpoint)
|
||||
)
|
||||
|
||||
func Init(cfg BackendSection) {
|
||||
defaultDataSource = cfg.DataSource
|
||||
|
||||
// init prometheus
|
||||
if cfg.Prometheus.Enable {
|
||||
promeDs := &pp.PromeDataSource{
|
||||
Section: cfg.Prometheus,
|
||||
PushQueue: list.NewSafeListLimited(10240000),
|
||||
}
|
||||
promeDs.Init()
|
||||
RegisterDataSource(cfg.Prometheus.Name, promeDs)
|
||||
}
|
||||
}
|
||||
|
||||
// get backend datasource
|
||||
// (pluginId == "" for default datasource)
|
||||
func GetDataSourceFor(pluginId string) (DataSource, error) {
|
||||
if pluginId == "" {
|
||||
pluginId = defaultDataSource
|
||||
}
|
||||
if source, exists := registryDataSources[pluginId]; exists {
|
||||
return source, nil
|
||||
}
|
||||
return nil, fmt.Errorf("could not find datasource for plugin: %s", pluginId)
|
||||
}
|
||||
|
||||
func DatasourceCleanUp() {
|
||||
for _, ds := range registryDataSources {
|
||||
ds.CleanUp()
|
||||
}
|
||||
}
|
||||
|
||||
// get all push endpoints
|
||||
func GetPushEndpoints() ([]PushEndpoint, error) {
|
||||
if len(registryPushEndpoints) > 0 {
|
||||
items := make([]PushEndpoint, 0, len(registryPushEndpoints))
|
||||
for _, value := range registryPushEndpoints {
|
||||
items = append(items, value)
|
||||
}
|
||||
return items, nil
|
||||
}
|
||||
return nil, fmt.Errorf("could not find any pushendpoint")
|
||||
}
|
||||
|
||||
func RegisterDataSource(pluginId string, datasource DataSource) {
|
||||
registryDataSources[pluginId] = datasource
|
||||
registryPushEndpoints[pluginId] = datasource
|
||||
}
|
|
@ -1,183 +0,0 @@
|
|||
package backend
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"time"
|
||||
|
||||
"github.com/gogo/protobuf/proto"
|
||||
"github.com/golang/snappy"
|
||||
"github.com/opentracing-contrib/go-stdlib/nethttp"
|
||||
"github.com/opentracing/opentracing-go"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/pkg/labels"
|
||||
"github.com/prometheus/prometheus/prompb"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
|
||||
"github.com/didi/nightingale/v5/vos"
|
||||
)
|
||||
|
||||
var MetricNameRE = regexp.MustCompile(`^[a-zA-Z_:][a-zA-Z0-9_:]*$`)
|
||||
|
||||
type sample struct {
|
||||
labels labels.Labels
|
||||
t int64
|
||||
v float64
|
||||
}
|
||||
|
||||
func labelsToLabelsProto(labels labels.Labels, buf []prompb.Label) []prompb.Label {
|
||||
result := buf[:0]
|
||||
if cap(buf) < len(labels) {
|
||||
result = make([]prompb.Label, 0, len(labels))
|
||||
}
|
||||
for _, l := range labels {
|
||||
result = append(result, prompb.Label{
|
||||
Name: l.Name,
|
||||
Value: l.Value,
|
||||
})
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (pd *PromeDataSource) convertOne(item *vos.MetricPoint) (prompb.TimeSeries, error) {
|
||||
pt := prompb.TimeSeries{}
|
||||
pt.Samples = []prompb.Sample{{}}
|
||||
s := sample{}
|
||||
s.t = item.Time
|
||||
s.v = item.Value
|
||||
// name
|
||||
if !MetricNameRE.MatchString(item.Metric) {
|
||||
return pt, errors.New("invalid metrics name")
|
||||
}
|
||||
nameLs := labels.Label{
|
||||
Name: LABEL_NAME,
|
||||
Value: item.Metric,
|
||||
}
|
||||
s.labels = append(s.labels, nameLs)
|
||||
if item.Ident != "" {
|
||||
identLs := labels.Label{
|
||||
Name: LABEL_IDENT,
|
||||
Value: item.Ident,
|
||||
}
|
||||
s.labels = append(s.labels, identLs)
|
||||
}
|
||||
|
||||
for k, v := range item.TagsMap {
|
||||
if model.LabelNameRE.MatchString(k) {
|
||||
ls := labels.Label{
|
||||
Name: k,
|
||||
Value: v,
|
||||
}
|
||||
s.labels = append(s.labels, ls)
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
pt.Labels = labelsToLabelsProto(s.labels, pt.Labels)
|
||||
// 时间赋值问题,使用毫秒时间戳
|
||||
tsMs := time.Unix(s.t, 0).UnixNano() / 1e6
|
||||
pt.Samples[0].Timestamp = tsMs
|
||||
pt.Samples[0].Value = s.v
|
||||
return pt, nil
|
||||
}
|
||||
|
||||
type RecoverableError struct {
|
||||
error
|
||||
}
|
||||
|
||||
func remoteWritePost(c *HttpClient, req []byte) error {
|
||||
httpReq, err := http.NewRequest("POST", c.url.String(), bytes.NewReader(req))
|
||||
if err != nil {
|
||||
// Errors from NewRequest are from unparsable URLs, so are not
|
||||
// recoverable.
|
||||
return err
|
||||
}
|
||||
|
||||
httpReq.Header.Add("Content-Encoding", "snappy")
|
||||
httpReq.Header.Set("Content-Type", "application/x-protobuf")
|
||||
httpReq.Header.Set("User-Agent", "n9e-v5")
|
||||
httpReq.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
|
||||
ctx, cancel := context.WithTimeout(context.Background(), c.timeout)
|
||||
defer cancel()
|
||||
|
||||
httpReq = httpReq.WithContext(ctx)
|
||||
|
||||
if parentSpan := opentracing.SpanFromContext(ctx); parentSpan != nil {
|
||||
var ht *nethttp.Tracer
|
||||
httpReq, ht = nethttp.TraceRequest(
|
||||
parentSpan.Tracer(),
|
||||
httpReq,
|
||||
nethttp.OperationName("Remote Store"),
|
||||
nethttp.ClientTrace(false),
|
||||
)
|
||||
defer ht.Finish()
|
||||
}
|
||||
|
||||
httpResp, err := c.Client.Do(httpReq)
|
||||
if err != nil {
|
||||
// Errors from Client.Do are from (for example) network errors, so are
|
||||
// recoverable.
|
||||
return RecoverableError{err}
|
||||
}
|
||||
defer func() {
|
||||
io.Copy(ioutil.Discard, httpResp.Body)
|
||||
httpResp.Body.Close()
|
||||
}()
|
||||
|
||||
if httpResp.StatusCode/100 != 2 {
|
||||
scanner := bufio.NewScanner(io.LimitReader(httpResp.Body, 512))
|
||||
line := ""
|
||||
if scanner.Scan() {
|
||||
line = scanner.Text()
|
||||
}
|
||||
|
||||
if httpResp.StatusCode == 400 {
|
||||
//400的错误是客户端的问题,不返回给上层,输出到debug日志中
|
||||
logger.Debugf("server returned HTTP status %s: %s req:%v", httpResp.Status, line, getSamples(req))
|
||||
} else {
|
||||
err = errors.Errorf("server returned HTTP status %s: %s", httpResp.Status, line)
|
||||
}
|
||||
}
|
||||
|
||||
if httpResp.StatusCode/100 == 5 {
|
||||
return RecoverableError{err}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (pd *PromeDataSource) buildWriteRequest(samples []prompb.TimeSeries) ([]byte, error) {
|
||||
|
||||
req := &prompb.WriteRequest{
|
||||
Timeseries: samples,
|
||||
Metadata: nil,
|
||||
}
|
||||
|
||||
data, err := proto.Marshal(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
compressed := snappy.Encode(nil, data)
|
||||
return compressed, nil
|
||||
}
|
||||
|
||||
func getSamples(compressed []byte) []prompb.TimeSeries {
|
||||
var samples []prompb.TimeSeries
|
||||
req := &prompb.WriteRequest{
|
||||
Timeseries: samples,
|
||||
Metadata: nil,
|
||||
}
|
||||
|
||||
d, _ := snappy.Decode(nil, compressed)
|
||||
proto.Unmarshal(d, req)
|
||||
|
||||
return req.Timeseries
|
||||
}
|
|
@ -1,257 +0,0 @@
|
|||
package backend
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/common/promlog"
|
||||
pc "github.com/prometheus/prometheus/config"
|
||||
"github.com/prometheus/prometheus/prompb"
|
||||
"github.com/prometheus/prometheus/promql"
|
||||
"github.com/prometheus/prometheus/storage"
|
||||
"github.com/prometheus/prometheus/storage/remote"
|
||||
"github.com/toolkits/pkg/container/list"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"go.uber.org/atomic"
|
||||
|
||||
"github.com/didi/nightingale/v5/vos"
|
||||
)
|
||||
|
||||
const (
|
||||
DefaultPopNum = 1000
|
||||
)
|
||||
|
||||
type PromeSection struct {
|
||||
Enable bool `yaml:"enable"`
|
||||
Name string `yaml:"name"`
|
||||
Batch int `yaml:"batch"`
|
||||
MaxRetry int `yaml:"maxRetry"`
|
||||
LookbackDeltaMinute int `yaml:"lookbackDeltaMinute"`
|
||||
MaxConcurrentQuery int `yaml:"maxConcurrentQuery"`
|
||||
MaxSamples int `yaml:"maxSamples"`
|
||||
MaxFetchAllSeriesLimitMinute int64 `yaml:"maxFetchAllSeriesLimitMinute"`
|
||||
SlowLogRecordSecond float64 `yaml:"slowLogRecordSecond"`
|
||||
DefaultFetchSeriesQl string `yaml:"defaultFetchSeriesQl"`
|
||||
RemoteWrite []RemoteConfig `yaml:"remoteWrite"`
|
||||
RemoteRead []RemoteConfig `yaml:"remoteRead"`
|
||||
}
|
||||
|
||||
type RemoteConfig struct {
|
||||
Name string `yaml:"name"`
|
||||
Url string `yaml:"url"`
|
||||
RemoteTimeoutSecond int `yaml:"remoteTimeoutSecond"`
|
||||
}
|
||||
|
||||
type PromeDataSource struct {
|
||||
Section PromeSection
|
||||
LocalTmpDir string
|
||||
// 除了promql的查询,需要后端存储
|
||||
Queryable storage.SampleAndChunkQueryable
|
||||
// promql相关查询
|
||||
QueryEngine *promql.Engine
|
||||
PushQueue *list.SafeListLimited
|
||||
WriteTargets []*HttpClient
|
||||
}
|
||||
type safePromQLNoStepSubqueryInterval struct {
|
||||
value atomic.Int64
|
||||
}
|
||||
|
||||
type HttpClient struct {
|
||||
remoteName string // Used to differentiate clients in metrics.
|
||||
url *url.URL
|
||||
Client *http.Client
|
||||
timeout time.Duration
|
||||
}
|
||||
|
||||
func durationToInt64Millis(d time.Duration) int64 {
|
||||
return int64(d / time.Millisecond)
|
||||
}
|
||||
func (i *safePromQLNoStepSubqueryInterval) Set(ev model.Duration) {
|
||||
i.value.Store(durationToInt64Millis(time.Duration(ev)))
|
||||
}
|
||||
func (i *safePromQLNoStepSubqueryInterval) Get(int64) int64 {
|
||||
return i.value.Load()
|
||||
}
|
||||
func (pd *PromeDataSource) CleanUp() {
|
||||
err := os.RemoveAll(pd.LocalTmpDir)
|
||||
logger.Infof("[remove_prome_tmp_dir_err][dir:%+v][err: %v]", pd.LocalTmpDir, err)
|
||||
|
||||
}
|
||||
func (pd *PromeDataSource) Init() {
|
||||
// 模拟创建本地存储目录
|
||||
dbDir, err := ioutil.TempDir("", "tsdb-api-ready")
|
||||
if err != nil {
|
||||
logger.Errorf("[error_create_local_tsdb_dir][err: %v]", err)
|
||||
return
|
||||
}
|
||||
pd.LocalTmpDir = dbDir
|
||||
|
||||
promlogConfig := promlog.Config{}
|
||||
// 使用本地目录创建remote-storage
|
||||
remoteS := remote.NewStorage(promlog.New(&promlogConfig), prometheus.DefaultRegisterer, func() (int64, error) {
|
||||
return 0, nil
|
||||
}, dbDir, 1*time.Minute, nil)
|
||||
|
||||
// ApplyConfig 加载queryables
|
||||
remoteReadC := make([]*pc.RemoteReadConfig, 0)
|
||||
for _, u := range pd.Section.RemoteRead {
|
||||
|
||||
ur, err := url.Parse(u.Url)
|
||||
if err != nil {
|
||||
logger.Errorf("[prome_ds_init_error][parse_url_error][url:%+v][err:%+v]", u.Url, err)
|
||||
continue
|
||||
}
|
||||
|
||||
remoteReadC = append(remoteReadC,
|
||||
&pc.RemoteReadConfig{
|
||||
URL: &config_util.URL{URL: ur},
|
||||
RemoteTimeout: model.Duration(time.Duration(u.RemoteTimeoutSecond) * time.Second),
|
||||
ReadRecent: true,
|
||||
},
|
||||
)
|
||||
}
|
||||
if len(remoteReadC) == 0 {
|
||||
logger.Errorf("[prome_ds_error_got_zero_remote_read_storage]")
|
||||
return
|
||||
}
|
||||
err = remoteS.ApplyConfig(&pc.Config{RemoteReadConfigs: remoteReadC})
|
||||
if err != nil {
|
||||
logger.Errorf("[error_load_remote_read_config][err: %v]", err)
|
||||
return
|
||||
}
|
||||
pLogger := log.NewNopLogger()
|
||||
|
||||
noStepSubqueryInterval := &safePromQLNoStepSubqueryInterval{}
|
||||
|
||||
queryQueueDir, err := ioutil.TempDir(dbDir, "prom_query_concurrency")
|
||||
opts := promql.EngineOpts{
|
||||
Logger: log.With(pLogger, "component", "query engine"),
|
||||
Reg: prometheus.DefaultRegisterer,
|
||||
MaxSamples: pd.Section.MaxSamples,
|
||||
Timeout: 30 * time.Second,
|
||||
ActiveQueryTracker: promql.NewActiveQueryTracker(queryQueueDir, pd.Section.MaxConcurrentQuery, log.With(pLogger, "component", "activeQueryTracker")),
|
||||
LookbackDelta: time.Duration(pd.Section.LookbackDeltaMinute) * time.Minute,
|
||||
NoStepSubqueryIntervalFn: noStepSubqueryInterval.Get,
|
||||
EnableAtModifier: true,
|
||||
}
|
||||
|
||||
queryEngine := promql.NewEngine(opts)
|
||||
pd.QueryEngine = queryEngine
|
||||
pd.Queryable = remoteS
|
||||
|
||||
// 初始化writeClients
|
||||
if len(pd.Section.RemoteWrite) == 0 {
|
||||
logger.Warningf("[prome_ds_init_with_zero_RemoteWrite_target]")
|
||||
logger.Infof("[successfully_init_prometheus_datasource][remote_read_num:%+v][remote_write_num:%+v]",
|
||||
len(pd.Section.RemoteRead),
|
||||
len(pd.Section.RemoteWrite),
|
||||
)
|
||||
return
|
||||
}
|
||||
writeTs := make([]*HttpClient, 0)
|
||||
for _, u := range pd.Section.RemoteWrite {
|
||||
ur, err := url.Parse(u.Url)
|
||||
if err != nil {
|
||||
logger.Errorf("[prome_ds_init_error][parse_url_error][url:%+v][err:%+v]", u.Url, err)
|
||||
continue
|
||||
}
|
||||
writeTs = append(writeTs,
|
||||
&HttpClient{
|
||||
remoteName: u.Name,
|
||||
url: ur,
|
||||
Client: &http.Client{},
|
||||
timeout: time.Duration(u.RemoteTimeoutSecond) * time.Second,
|
||||
})
|
||||
}
|
||||
pd.WriteTargets = writeTs
|
||||
// 开启prometheus 队列消费协程
|
||||
go pd.remoteWrite()
|
||||
logger.Infof("[successfully_init_prometheus_datasource][remote_read_num:%+v][remote_write_num:%+v]",
|
||||
len(remoteReadC),
|
||||
len(writeTs),
|
||||
)
|
||||
}
|
||||
|
||||
func (pd *PromeDataSource) Push2Queue(points []*vos.MetricPoint) {
|
||||
for _, point := range points {
|
||||
pt, err := pd.convertOne(point)
|
||||
if err != nil {
|
||||
logger.Errorf("[prome_convertOne_error][point: %+v][err:%s]", point, err)
|
||||
continue
|
||||
}
|
||||
ok := pd.PushQueue.PushFront(pt)
|
||||
if !ok {
|
||||
logger.Errorf("[prome_push_queue_error][point: %+v] ", point)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (pd *PromeDataSource) remoteWrite() {
|
||||
batch := pd.Section.Batch // 一次发送,最多batch条数据
|
||||
if batch <= 0 {
|
||||
batch = DefaultPopNum
|
||||
}
|
||||
for {
|
||||
items := pd.PushQueue.PopBackBy(batch)
|
||||
count := len(items)
|
||||
if count == 0 {
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
continue
|
||||
}
|
||||
|
||||
pbItems := make([]prompb.TimeSeries, count)
|
||||
for i := 0; i < count; i++ {
|
||||
pbItems[i] = items[i].(prompb.TimeSeries)
|
||||
}
|
||||
payload, err := pd.buildWriteRequest(pbItems)
|
||||
if err != nil {
|
||||
logger.Errorf("[prome_remote_write_error][pb_marshal_error][items: %+v][pb.err: %v]: ", items, err)
|
||||
continue
|
||||
}
|
||||
pd.processWrite(payload)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func (pd *PromeDataSource) processWrite(payload []byte) {
|
||||
|
||||
retry := pd.Section.MaxRetry
|
||||
|
||||
for _, c := range pd.WriteTargets {
|
||||
newC := c
|
||||
go func(cc *HttpClient, payload []byte) {
|
||||
sendOk := false
|
||||
var rec bool
|
||||
var finalErr error
|
||||
for i := 0; i < retry; i++ {
|
||||
err := remoteWritePost(cc, payload)
|
||||
if err == nil {
|
||||
sendOk = true
|
||||
break
|
||||
}
|
||||
|
||||
_, rec = err.(RecoverableError)
|
||||
|
||||
if !rec {
|
||||
finalErr = err
|
||||
break
|
||||
}
|
||||
logger.Warningf("[send prome fail recoverableError][retry: %d/%d][err:%v]", i+1, retry, err)
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
}
|
||||
if !sendOk {
|
||||
logger.Errorf("send prome finally fail: %v", finalErr)
|
||||
} else {
|
||||
logger.Debugf("send to prome %s ok", cc.url.String())
|
||||
}
|
||||
}(newC, payload)
|
||||
}
|
||||
|
||||
}
|
|
@ -1,754 +0,0 @@
|
|||
package backend
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/prometheus/pkg/labels"
|
||||
"github.com/prometheus/prometheus/promql"
|
||||
"github.com/prometheus/prometheus/promql/parser"
|
||||
"github.com/prometheus/prometheus/storage"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
|
||||
"github.com/didi/nightingale/v5/cache"
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
"github.com/didi/nightingale/v5/vos"
|
||||
)
|
||||
|
||||
const (
|
||||
LABEL_IDENT = "ident"
|
||||
LABEL_NAME = "__name__"
|
||||
DEFAULT_STEP = 15
|
||||
)
|
||||
|
||||
type commonQueryObj struct {
|
||||
Idents []string
|
||||
TagPairs []*vos.TagPair
|
||||
Metric string
|
||||
Start int64
|
||||
End int64
|
||||
MetricNameExact bool // metric_name精确匹配,在查询看图的时候为true
|
||||
From string // 调用的来源
|
||||
}
|
||||
|
||||
// 为查询索引或标签相关的转换,大部分都是正则匹配
|
||||
func convertToPromql(recv *commonQueryObj) string {
|
||||
|
||||
qlStr := ""
|
||||
qlStrFinal := ""
|
||||
metricName := ""
|
||||
labelIdent := ""
|
||||
labelStrSlice := make([]string, 0)
|
||||
// 匹配metric_name __name__=~"xx.*"
|
||||
if recv.Metric != "" {
|
||||
if recv.MetricNameExact {
|
||||
metricName = fmt.Sprintf(`__name__="%s"`, recv.Metric)
|
||||
} else {
|
||||
metricName = fmt.Sprintf(`__name__=~".*%s.*"`, recv.Metric)
|
||||
}
|
||||
|
||||
labelStrSlice = append(labelStrSlice, metricName)
|
||||
|
||||
}
|
||||
// 匹配ident=~"k1|k2"
|
||||
labelIdent = strings.Join(recv.Idents, "|")
|
||||
if labelIdent != "" {
|
||||
labelStrSlice = append(labelStrSlice, fmt.Sprintf(`ident=~"%s"`, labelIdent))
|
||||
}
|
||||
// 匹配标签
|
||||
labelM := make(map[string]string)
|
||||
for _, i := range recv.TagPairs {
|
||||
if i.Key == "" {
|
||||
continue
|
||||
}
|
||||
lastStr, _ := labelM[i.Key]
|
||||
|
||||
lastStr += fmt.Sprintf(`.*%s.*|`, i.Value)
|
||||
labelM[i.Key] = lastStr
|
||||
}
|
||||
for k, v := range labelM {
|
||||
thisLabel := strings.TrimRight(v, "|")
|
||||
labelStrSlice = append(labelStrSlice, fmt.Sprintf(`%s=~"%s"`, k, thisLabel))
|
||||
|
||||
}
|
||||
|
||||
qlStr = strings.Join(labelStrSlice, ",")
|
||||
qlStrFinal = fmt.Sprintf(`{%s}`, qlStr)
|
||||
logger.Debugf("[convertToPromql][type=queryLabel][recv:%+v][qlStrFinal:%s]", recv, qlStrFinal)
|
||||
|
||||
return qlStrFinal
|
||||
}
|
||||
|
||||
// 查询数据的转换,metrics_name和标签都是精确匹配
|
||||
func convertToPromqlForQueryData(recv *commonQueryObj) string {
|
||||
|
||||
qlStr := ""
|
||||
qlStrFinal := ""
|
||||
metricName := ""
|
||||
labelIdent := ""
|
||||
labelStrSlice := make([]string, 0)
|
||||
// 匹配metric_name __name__=~"xx.*"
|
||||
if recv.Metric != "" {
|
||||
metricName = fmt.Sprintf(`__name__="%s"`, recv.Metric)
|
||||
|
||||
labelStrSlice = append(labelStrSlice, metricName)
|
||||
|
||||
}
|
||||
// 匹配ident=~"k1|k2"
|
||||
labelIdent = strings.Join(recv.Idents, "|")
|
||||
if labelIdent != "" {
|
||||
labelStrSlice = append(labelStrSlice, fmt.Sprintf(`ident=~"%s"`, labelIdent))
|
||||
}
|
||||
// 匹配标签
|
||||
labelM := make(map[string]string)
|
||||
for _, i := range recv.TagPairs {
|
||||
if i.Key == "" {
|
||||
continue
|
||||
}
|
||||
lastStr, _ := labelM[i.Key]
|
||||
|
||||
lastStr += fmt.Sprintf(`%s|`, i.Value)
|
||||
labelM[i.Key] = lastStr
|
||||
}
|
||||
for k, v := range labelM {
|
||||
thisLabel := strings.TrimRight(v, "|")
|
||||
labelStrSlice = append(labelStrSlice, fmt.Sprintf(`%s=~"%s"`, k, thisLabel))
|
||||
|
||||
}
|
||||
|
||||
qlStr = strings.Join(labelStrSlice, ",")
|
||||
qlStrFinal = fmt.Sprintf(`{%s}`, qlStr)
|
||||
logger.Debugf("[convertToPromql][type=queryData][recv:%+v][qlStrFinal:%s]", recv, qlStrFinal)
|
||||
|
||||
return qlStrFinal
|
||||
}
|
||||
|
||||
func parseMatchersParam(matchers []string) ([][]*labels.Matcher, error) {
|
||||
var matcherSets [][]*labels.Matcher
|
||||
for _, s := range matchers {
|
||||
matchers, err := parser.ParseMetricSelector(s)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
matcherSets = append(matcherSets, matchers)
|
||||
}
|
||||
|
||||
OUTER:
|
||||
for _, ms := range matcherSets {
|
||||
for _, lm := range ms {
|
||||
if lm != nil && !lm.Matches("") {
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
return nil, errors.New("match[] must contain at least one non-empty matcher")
|
||||
}
|
||||
return matcherSets, nil
|
||||
}
|
||||
|
||||
func (pd *PromeDataSource) QueryData(inputs vos.DataQueryParam) []*vos.DataQueryResp {
|
||||
|
||||
respD := make([]*vos.DataQueryResp, 0)
|
||||
for _, input := range inputs.Params {
|
||||
var qlStrFinal string
|
||||
|
||||
if input.PromeQl != "" {
|
||||
qlStrFinal = input.PromeQl
|
||||
} else {
|
||||
if len(input.Idents) == 0 {
|
||||
for i := range input.TagPairs {
|
||||
if input.TagPairs[i].Key == "ident" {
|
||||
input.Idents = append(input.Idents, input.TagPairs[i].Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(input.Idents) == 0 && input.ClasspathId != 0 {
|
||||
if input.ClasspathPrefix == 0 {
|
||||
classpathAndRes, exists := cache.ClasspathRes.Get(input.ClasspathId)
|
||||
if exists {
|
||||
input.Idents = classpathAndRes.Res
|
||||
}
|
||||
} else {
|
||||
classpath, err := models.ClasspathGet("id=?", input.ClasspathId)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
cps, _ := models.ClasspathGetsByPrefix(classpath.Path)
|
||||
for _, classpath := range cps {
|
||||
classpathAndRes, exists := cache.ClasspathRes.Get(classpath.Id)
|
||||
if exists {
|
||||
idents := classpathAndRes.Res
|
||||
input.Idents = append(input.Idents, idents...)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cj := &commonQueryObj{
|
||||
Idents: input.Idents,
|
||||
TagPairs: input.TagPairs,
|
||||
Metric: input.Metric,
|
||||
Start: inputs.Start,
|
||||
End: inputs.End,
|
||||
MetricNameExact: true,
|
||||
}
|
||||
qlStrFinal = convertToPromqlForQueryData(cj)
|
||||
|
||||
}
|
||||
|
||||
logger.Debugf("[input:%+v][qlStrFinal:%s]\n", input, qlStrFinal)
|
||||
// 转化为utc时间
|
||||
startT := tsToUtcTs(inputs.Start)
|
||||
endT := tsToUtcTs(inputs.End)
|
||||
|
||||
resolution := time.Second * time.Duration(inputs.Step)
|
||||
if inputs.Step == 0 {
|
||||
// step==0 说明要自己算 grafana和prometheus ui都是前端传入
|
||||
delta := (inputs.End - inputs.Start) / 3600
|
||||
if delta <= 0 {
|
||||
delta = 1
|
||||
}
|
||||
resolution = time.Second * time.Duration(delta*DEFAULT_STEP)
|
||||
}
|
||||
q, err := pd.QueryEngine.NewRangeQuery(pd.Queryable, qlStrFinal, startT, endT, resolution)
|
||||
if err != nil {
|
||||
logger.Errorf("[prome_query_error][QueryData_error_may_be_parse_ql_error][args:%+v][err:%+v]", input, err)
|
||||
continue
|
||||
}
|
||||
ctx, _ := context.WithTimeout(context.Background(), time.Second*30)
|
||||
res := q.Exec(ctx)
|
||||
if res.Err != nil {
|
||||
logger.Errorf("[prome_query_error][rangeQuery_exec_error][args:%+v][err:%+v]", input, res.Err)
|
||||
q.Close()
|
||||
continue
|
||||
}
|
||||
mat, ok := res.Value.(promql.Matrix)
|
||||
if !ok {
|
||||
logger.Errorf("[promql.Engine.exec: invalid expression type %q]", res.Value.Type())
|
||||
q.Close()
|
||||
continue
|
||||
}
|
||||
if res.Err != nil {
|
||||
logger.Errorf("[prome_query_error][res.Matrix_error][args:%+v][err:%+v]", input, res.Err)
|
||||
q.Close()
|
||||
continue
|
||||
}
|
||||
for index, m := range mat {
|
||||
if inputs.Limit > 0 && index+1 > inputs.Limit {
|
||||
continue
|
||||
}
|
||||
tagStr := ""
|
||||
oneResp := &vos.DataQueryResp{}
|
||||
|
||||
ident := m.Metric.Get(LABEL_IDENT)
|
||||
name := m.Metric.Get(LABEL_NAME)
|
||||
oneResp.Metric = name
|
||||
oneResp.Ident = ident
|
||||
pNum := len(m.Points)
|
||||
interval := int64(resolution / time.Second)
|
||||
pNumExpect := int((inputs.End - inputs.Start) / interval)
|
||||
|
||||
remotePIndex := 0
|
||||
for i := 0; i <= pNumExpect; i++ {
|
||||
|
||||
// 先准备好null的point
|
||||
tsLocal := inputs.Start + interval*int64(i)
|
||||
tmpP := &vos.Point{
|
||||
Timestamp: tsLocal,
|
||||
Value: vos.JsonFloat(math.NaN()),
|
||||
}
|
||||
//说明points数组还没越界
|
||||
//去m.Points获取一个
|
||||
if remotePIndex < pNum {
|
||||
pointOne := m.Points[remotePIndex]
|
||||
tsRemote := pointOne.T / 1e3
|
||||
// 判断时间戳 ,前后相差1秒认为时间戳对齐了
|
||||
if math.Abs(float64(tsRemote-tsLocal)) <= 1 {
|
||||
tmpP.Timestamp = tsRemote
|
||||
tmpP.Value = vos.JsonFloat(pointOne.V)
|
||||
// 说明远端的这个索引的值已经被pop了,移动索引
|
||||
remotePIndex++
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
oneResp.Values = append(oneResp.Values, tmpP)
|
||||
|
||||
}
|
||||
|
||||
for _, x := range m.Metric {
|
||||
if x.Name == LABEL_NAME {
|
||||
continue
|
||||
}
|
||||
tagStr += fmt.Sprintf("%s=%s,", x.Name, x.Value)
|
||||
}
|
||||
tagStr = strings.TrimRight(tagStr, ",")
|
||||
oneResp.Tags = tagStr
|
||||
oneResp.Resolution = interval
|
||||
oneResp.PNum = pNum
|
||||
respD = append(respD, oneResp)
|
||||
|
||||
}
|
||||
q.Close()
|
||||
|
||||
}
|
||||
return respD
|
||||
}
|
||||
|
||||
func tsToUtcTs(s int64) time.Time {
|
||||
return time.Unix(s, 0).UTC()
|
||||
}
|
||||
func timeParse(ts int64) time.Time {
|
||||
t := float64(ts)
|
||||
s, ns := math.Modf(t)
|
||||
ns = math.Round(ns*1000) / 1000
|
||||
return time.Unix(int64(s), int64(ns*float64(time.Second))).UTC()
|
||||
}
|
||||
|
||||
func millisecondTs(t time.Time) int64 {
|
||||
return t.Unix()*1000 + int64(t.Nanosecond())/int64(time.Millisecond)
|
||||
}
|
||||
func tsToStr(timestamp int64) string {
|
||||
timeNow := time.Unix(timestamp, 0)
|
||||
return timeNow.Format("2006-01-02 15:04:05")
|
||||
}
|
||||
|
||||
func (pd *PromeDataSource) CommonQuerySeries(cj *commonQueryObj) storage.SeriesSet {
|
||||
qlStrFinal := convertToPromql(cj)
|
||||
|
||||
if qlStrFinal == "{}" {
|
||||
qlStrFinal = pd.Section.DefaultFetchSeriesQl
|
||||
reqMinute := (cj.End - cj.Start) / 60
|
||||
// 如果前端啥都没传,要限制下查询series的时间范围,防止高基础查询
|
||||
if reqMinute > pd.Section.MaxFetchAllSeriesLimitMinute {
|
||||
// 时间超长,用配置文件中的限制一下
|
||||
now := time.Now().Unix()
|
||||
cj.End = now
|
||||
cj.Start = now - pd.Section.MaxFetchAllSeriesLimitMinute*60
|
||||
logger.Debugf("[CommonQuerySeries.FetchAllSeries.LimitQueryTimeRange][start:%v][end:%v]", cj.Start, cj.End)
|
||||
}
|
||||
}
|
||||
|
||||
matcherSets, err := parseMatchersParam([]string{qlStrFinal})
|
||||
if err != nil {
|
||||
logger.Errorf("[prome_query_error][parse_label_match_error][err:%+v]", err)
|
||||
return nil
|
||||
}
|
||||
now := time.Now().Unix()
|
||||
if cj.Start == 0 {
|
||||
cj.Start = now - 60*pd.Section.MaxFetchAllSeriesLimitMinute
|
||||
}
|
||||
if cj.End == 0 {
|
||||
cj.End = now
|
||||
}
|
||||
|
||||
startT := millisecondTs(timeParse(cj.Start))
|
||||
endT := millisecondTs(timeParse(cj.End))
|
||||
|
||||
ctx, _ := context.WithTimeout(context.Background(), time.Second*30)
|
||||
q, err := pd.Queryable.Querier(ctx, startT, endT)
|
||||
if err != nil {
|
||||
|
||||
logger.Errorf("[prome_query_error][get_querier_errro]")
|
||||
return nil
|
||||
}
|
||||
logger.Debugf("[CommonQuerySeries.Result][from:%s][cj.start_ts:%+v cj.start_str:%+v SelectHints.startT:%+v][cj.end_ts:%+v cj.end_str:%+v SelectHints.endT:%+v][qlStrFinal:%s][cj:%+v]",
|
||||
cj.From,
|
||||
cj.Start,
|
||||
tsToStr(cj.Start),
|
||||
startT,
|
||||
cj.End,
|
||||
tsToStr(cj.End),
|
||||
endT,
|
||||
qlStrFinal,
|
||||
cj,
|
||||
)
|
||||
|
||||
defer q.Close()
|
||||
|
||||
hints := &storage.SelectHints{
|
||||
Start: startT,
|
||||
End: endT,
|
||||
Func: "series", // There is no series function, this token is used for lookups that don't need samples.
|
||||
}
|
||||
|
||||
// Get all series which match matchers.
|
||||
startTs := time.Now()
|
||||
s := q.Select(true, hints, matcherSets[0]...)
|
||||
timeTookSecond := time.Since(startTs).Seconds()
|
||||
if timeTookSecond > pd.Section.SlowLogRecordSecond {
|
||||
logger.Warningf("[prome_remote_read_show_slow_log_CommonQuerySeries_select][threshold:%v][timeTookSecond:%v][from:%v][args:%+v][promql:%v]",
|
||||
pd.Section.SlowLogRecordSecond,
|
||||
timeTookSecond,
|
||||
cj.From,
|
||||
cj,
|
||||
qlStrFinal,
|
||||
)
|
||||
}
|
||||
|
||||
return s
|
||||
|
||||
}
|
||||
|
||||
// 全部转化为 {__name__="a",label_a!="b",label_b=~"d|c",label_c!~"d"}
|
||||
// 对应prometheus 中的 /api/v1/labels
|
||||
// TODO 等待prometheus官方对 remote_read label_values 的支持
|
||||
// Implement: https://github.com/prometheus/prometheus/issues/3351
|
||||
func (pd *PromeDataSource) QueryTagKeys(recv vos.CommonTagQueryParam) *vos.TagKeyQueryResp {
|
||||
startTs := time.Now()
|
||||
respD := &vos.TagKeyQueryResp{
|
||||
Keys: make([]string, 0),
|
||||
}
|
||||
|
||||
labelNamesSet := make(map[string]struct{})
|
||||
if len(recv.Params) == 0 {
|
||||
recv.Params = append(recv.Params, vos.TagPairQueryParamOne{
|
||||
Idents: []string{},
|
||||
Metric: "",
|
||||
})
|
||||
}
|
||||
resultSeries := ""
|
||||
for _, x := range recv.Params {
|
||||
cj := &commonQueryObj{
|
||||
Idents: x.Idents,
|
||||
TagPairs: recv.TagPairs,
|
||||
Metric: x.Metric,
|
||||
Start: recv.Start,
|
||||
End: recv.End,
|
||||
From: "QueryTagKeys",
|
||||
}
|
||||
|
||||
s := pd.CommonQuerySeries(cj)
|
||||
if s.Warnings() != nil {
|
||||
logger.Warningf("[prome_query_error][series_set_iter_error][warning:%+v]", s.Warnings())
|
||||
|
||||
}
|
||||
|
||||
if err := s.Err(); err != nil {
|
||||
logger.Errorf("[prome_query_error][series_set_iter_error][err:%+v]", err)
|
||||
continue
|
||||
}
|
||||
thisSeriesNum := 0
|
||||
for s.Next() {
|
||||
series := s.At()
|
||||
thisSeriesNum++
|
||||
for _, lb := range series.Labels() {
|
||||
if lb.Name == LABEL_NAME {
|
||||
continue
|
||||
|
||||
}
|
||||
if recv.TagKey != "" {
|
||||
if !strings.Contains(lb.Name, recv.TagKey) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
labelNamesSet[lb.Name] = struct{}{}
|
||||
}
|
||||
}
|
||||
resultSeries += fmt.Sprintf(" %d ", thisSeriesNum)
|
||||
|
||||
}
|
||||
names := make([]string, len(labelNamesSet))
|
||||
i := 0
|
||||
for key := range labelNamesSet {
|
||||
names[i] = key
|
||||
i++
|
||||
}
|
||||
sort.Strings(names)
|
||||
// 因为map中的key是无序的,必须这样才能稳定输出
|
||||
if recv.Limit > 0 && len(names) > recv.Limit {
|
||||
names = names[:recv.Limit]
|
||||
}
|
||||
|
||||
respD.Keys = names
|
||||
timeTookSecond := time.Since(startTs).Seconds()
|
||||
if timeTookSecond > pd.Section.SlowLogRecordSecond {
|
||||
logger.Warningf("[prome_remote_read_show_slow_log][threshold:%v][timeTookSecond:%v][func:QueryTagKeys][args:%+v][resultSeries:%v]", pd.Section.SlowLogRecordSecond, timeTookSecond, recv, resultSeries)
|
||||
}
|
||||
return respD
|
||||
|
||||
}
|
||||
|
||||
// 对应prometheus 中的 /api/v1/label/<label_name>/values
|
||||
func (pd *PromeDataSource) QueryTagValues(recv vos.CommonTagQueryParam) *vos.TagValueQueryResp {
|
||||
startTs := time.Now()
|
||||
labelValuesSet := make(map[string]struct{})
|
||||
|
||||
if len(recv.Params) == 0 {
|
||||
recv.Params = append(recv.Params, vos.TagPairQueryParamOne{
|
||||
Idents: []string{},
|
||||
Metric: "",
|
||||
})
|
||||
}
|
||||
resultSeries := ""
|
||||
for _, x := range recv.Params {
|
||||
cj := &commonQueryObj{
|
||||
Idents: x.Idents,
|
||||
Metric: x.Metric,
|
||||
TagPairs: recv.TagPairs,
|
||||
Start: recv.Start,
|
||||
End: recv.End,
|
||||
From: "QueryTagValues",
|
||||
}
|
||||
|
||||
s := pd.CommonQuerySeries(cj)
|
||||
if s.Warnings() != nil {
|
||||
logger.Warningf("[prome_query_error][series_set_iter_error][warning:%+v]", s.Warnings())
|
||||
|
||||
}
|
||||
|
||||
if err := s.Err(); err != nil {
|
||||
logger.Errorf("[prome_query_error][series_set_iter_error][err:%+v]", err)
|
||||
continue
|
||||
}
|
||||
thisSeriesNum := 0
|
||||
for s.Next() {
|
||||
series := s.At()
|
||||
thisSeriesNum++
|
||||
for _, lb := range series.Labels() {
|
||||
if lb.Name == recv.TagKey {
|
||||
if recv.TagValue != "" {
|
||||
if !strings.Contains(lb.Value, recv.TagValue) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
labelValuesSet[lb.Value] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
resultSeries += fmt.Sprintf(" %d ", thisSeriesNum)
|
||||
}
|
||||
vals := make([]string, len(labelValuesSet))
|
||||
i := 0
|
||||
for val := range labelValuesSet {
|
||||
vals[i] = val
|
||||
i++
|
||||
}
|
||||
sort.Strings(vals)
|
||||
if recv.Limit > 0 && len(vals) > recv.Limit {
|
||||
vals = vals[:recv.Limit]
|
||||
}
|
||||
respD := &vos.TagValueQueryResp{}
|
||||
respD.Values = vals
|
||||
timeTookSecond := time.Since(startTs).Seconds()
|
||||
if timeTookSecond > pd.Section.SlowLogRecordSecond {
|
||||
logger.Warningf("[prome_remote_read_show_slow_log][threshold:%v][timeTookSecond:%v][func:QueryTagValues][args:%+v][resultSeries:%v]", pd.Section.SlowLogRecordSecond, timeTookSecond, recv, resultSeries)
|
||||
}
|
||||
return respD
|
||||
|
||||
}
|
||||
|
||||
// 对应prometheus 中的 /api/v1/label/<label_name>/values label_name == __name__
|
||||
func (pd *PromeDataSource) QueryMetrics(recv vos.MetricQueryParam) *vos.MetricQueryResp {
|
||||
startTs := time.Now()
|
||||
cj := &commonQueryObj{
|
||||
Idents: recv.Idents,
|
||||
Metric: recv.Metric,
|
||||
TagPairs: recv.TagPairs,
|
||||
Start: recv.Start,
|
||||
End: recv.End,
|
||||
From: "QueryMetrics",
|
||||
}
|
||||
|
||||
respD := &vos.MetricQueryResp{}
|
||||
respD.Metrics = make([]string, 0)
|
||||
s := pd.CommonQuerySeries(cj)
|
||||
if s.Warnings() != nil {
|
||||
logger.Warningf("[prome_query_error][series_set_iter_error][warning:%+v]", s.Warnings())
|
||||
|
||||
}
|
||||
|
||||
if err := s.Err(); err != nil {
|
||||
logger.Errorf("[prome_query_error][series_set_iter_error][err:%+v]", err)
|
||||
return respD
|
||||
}
|
||||
|
||||
var sets []storage.SeriesSet
|
||||
sets = append(sets, s)
|
||||
set := storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge)
|
||||
labelValuesSet := make(map[string]struct{})
|
||||
resultSeries := ""
|
||||
thisSeriesNum := 0
|
||||
for set.Next() {
|
||||
series := set.At()
|
||||
thisSeriesNum++
|
||||
for _, lb := range series.Labels() {
|
||||
if lb.Name == LABEL_NAME {
|
||||
labelValuesSet[lb.Value] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
resultSeries += fmt.Sprintf(" %d ", thisSeriesNum)
|
||||
vals := make([]string, len(labelValuesSet))
|
||||
i := 0
|
||||
for val := range labelValuesSet {
|
||||
vals[i] = val
|
||||
i++
|
||||
}
|
||||
|
||||
sort.Strings(vals)
|
||||
|
||||
if recv.Limit > 0 && len(vals) > recv.Limit {
|
||||
vals = vals[:recv.Limit]
|
||||
}
|
||||
respD.Metrics = vals
|
||||
timeTookSecond := time.Since(startTs).Seconds()
|
||||
if timeTookSecond > pd.Section.SlowLogRecordSecond {
|
||||
logger.Warningf("[prome_remote_read_show_slow_log][threshold:%v][timeTookSecond:%v][func:QueryMetrics][args:%+v][resultSeries:%v]", pd.Section.SlowLogRecordSecond, timeTookSecond, recv, resultSeries)
|
||||
}
|
||||
return respD
|
||||
}
|
||||
|
||||
// 对应prometheus 中的 /api/v1/series
|
||||
func (pd *PromeDataSource) QueryTagPairs(recv vos.CommonTagQueryParam) *vos.TagPairQueryResp {
|
||||
startTs := time.Now()
|
||||
respD := &vos.TagPairQueryResp{
|
||||
TagPairs: make([]string, 0),
|
||||
Idents: make([]string, 0),
|
||||
}
|
||||
tps := make(map[string]struct{})
|
||||
if len(recv.Params) == 0 {
|
||||
recv.Params = append(recv.Params, vos.TagPairQueryParamOne{
|
||||
Idents: []string{},
|
||||
Metric: "",
|
||||
})
|
||||
}
|
||||
resultSeries := ""
|
||||
for _, x := range recv.Params {
|
||||
cj := &commonQueryObj{
|
||||
Idents: x.Idents,
|
||||
TagPairs: recv.TagPairs,
|
||||
Metric: x.Metric,
|
||||
Start: recv.Start,
|
||||
End: recv.End,
|
||||
From: "QueryTagPairs",
|
||||
}
|
||||
|
||||
s := pd.CommonQuerySeries(cj)
|
||||
if s.Warnings() != nil {
|
||||
logger.Warningf("[prome_query_error][series_set_iter_error][warning:%+v]", s.Warnings())
|
||||
|
||||
}
|
||||
|
||||
if err := s.Err(); err != nil {
|
||||
logger.Errorf("[prome_query_error][series_set_iter_error][err:%+v]", err)
|
||||
continue
|
||||
}
|
||||
|
||||
var sets []storage.SeriesSet
|
||||
sets = append(sets, s)
|
||||
set := storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge)
|
||||
|
||||
labelIdents := make([]string, 0)
|
||||
thisSeriesNum := 0
|
||||
for set.Next() {
|
||||
series := s.At()
|
||||
thisSeriesNum++
|
||||
labelsS := series.Labels()
|
||||
for _, i := range labelsS {
|
||||
|
||||
if i.Name == LABEL_NAME {
|
||||
continue
|
||||
}
|
||||
if i.Name == LABEL_IDENT {
|
||||
labelIdents = append(labelIdents, i.Value)
|
||||
}
|
||||
if recv.Search != "" {
|
||||
// 如果配置了搜索字符串,则key value中任意匹配到即可
|
||||
if strings.Contains(i.Name, recv.Search) || strings.Contains(i.Value, recv.Search) {
|
||||
tps[fmt.Sprintf("%s=%s", i.Name, i.Value)] = struct{}{}
|
||||
}
|
||||
} else {
|
||||
tps[fmt.Sprintf("%s=%s", i.Name, i.Value)] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
resultSeries += fmt.Sprintf(" %d ", thisSeriesNum)
|
||||
|
||||
}
|
||||
|
||||
newTags := make([]string, len(tps))
|
||||
i := 0
|
||||
for k := range tps {
|
||||
newTags[i] = k
|
||||
i++
|
||||
}
|
||||
|
||||
sort.Strings(newTags)
|
||||
if recv.Limit > 0 && len(newTags) > recv.Limit {
|
||||
newTags = newTags[:recv.Limit]
|
||||
}
|
||||
|
||||
respD.TagPairs = newTags
|
||||
timeTookSecond := time.Since(startTs).Seconds()
|
||||
if timeTookSecond > pd.Section.SlowLogRecordSecond {
|
||||
logger.Warningf("[prome_remote_read_show_slow_log][threshold:%v][timeTookSecond:%v][func:QueryTagPairs][args:%+v][resultSeries:%v]", pd.Section.SlowLogRecordSecond, timeTookSecond, recv, resultSeries)
|
||||
}
|
||||
return respD
|
||||
}
|
||||
|
||||
func (pd *PromeDataSource) QueryDataInstant(ql string) []*vos.DataQueryInstanceResp {
|
||||
respD := make([]*vos.DataQueryInstanceResp, 0)
|
||||
pv := pd.QueryVector(ql)
|
||||
if pv == nil {
|
||||
|
||||
return respD
|
||||
}
|
||||
|
||||
for _, s := range pv {
|
||||
metricOne := make(map[string]interface{})
|
||||
valueOne := make([]float64, 0)
|
||||
|
||||
for _, l := range s.Metric {
|
||||
if l.Name == LABEL_NAME {
|
||||
continue
|
||||
}
|
||||
metricOne[l.Name] = l.Value
|
||||
}
|
||||
// 毫秒时间时间戳转 秒时间戳
|
||||
valueOne = append(valueOne, float64(s.Point.T)/1e3)
|
||||
valueOne = append(valueOne, s.Point.V)
|
||||
respD = append(respD, &vos.DataQueryInstanceResp{
|
||||
Metric: metricOne,
|
||||
Value: valueOne,
|
||||
})
|
||||
|
||||
}
|
||||
return respD
|
||||
}
|
||||
|
||||
func (pd *PromeDataSource) QueryVector(ql string) promql.Vector {
|
||||
t := time.Now()
|
||||
q, err := pd.QueryEngine.NewInstantQuery(pd.Queryable, ql, t)
|
||||
if err != nil {
|
||||
logger.Errorf("[prome_query_error][new_insQuery_error][err:%+v][ql:%+v]", err, ql)
|
||||
return nil
|
||||
}
|
||||
ctx := context.Background()
|
||||
res := q.Exec(ctx)
|
||||
if res.Err != nil {
|
||||
logger.Errorf("[prome_query_error][insQuery_exec_error][err:%+v][ql:%+v]", err, ql)
|
||||
return nil
|
||||
}
|
||||
defer q.Close()
|
||||
switch v := res.Value.(type) {
|
||||
case promql.Vector:
|
||||
return v
|
||||
case promql.Scalar:
|
||||
return promql.Vector{promql.Sample{
|
||||
Point: promql.Point(v),
|
||||
Metric: labels.Labels{},
|
||||
}}
|
||||
default:
|
||||
logger.Errorf("[prome_query_error][insQuery_res_error rule result is not a vector or scalar][err:%+v][ql:%+v]", err, ql)
|
||||
return nil
|
||||
}
|
||||
|
||||
}
|
9
build.sh
9
build.sh
|
@ -1,9 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# release version
|
||||
version=5.0.0-rc7-1
|
||||
|
||||
#export GO111MODULE=on
|
||||
#export GOPROXY=https://goproxy.cn
|
||||
go build -ldflags "-X github.com/didi/nightingale/v5/config.Version=${version}" -o n9e-server main.go
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
package cache
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type AlertMuteMap struct {
|
||||
sync.RWMutex
|
||||
Data map[string][]Filter
|
||||
}
|
||||
type Filter struct {
|
||||
ClasspathPrefix string
|
||||
ResReg *regexp.Regexp
|
||||
TagsMap map[string]string
|
||||
}
|
||||
|
||||
var AlertMute = &AlertMuteMap{Data: make(map[string][]Filter)}
|
||||
|
||||
func (a *AlertMuteMap) SetAll(m map[string][]Filter) {
|
||||
a.Lock()
|
||||
defer a.Unlock()
|
||||
a.Data = m
|
||||
}
|
||||
|
||||
func (a *AlertMuteMap) GetByKey(key string) ([]Filter, bool) {
|
||||
a.RLock()
|
||||
defer a.RUnlock()
|
||||
|
||||
value, exists := a.Data[key]
|
||||
|
||||
return value, exists
|
||||
}
|
|
@ -1,75 +0,0 @@
|
|||
package cache
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
type AlertRulesByMetricCache struct {
|
||||
sync.RWMutex
|
||||
Data map[string][]*models.AlertRule // key是metric,便于后续检索
|
||||
MaxUpdateTs int64 // 从数据库拿到的最大update_at
|
||||
RuleNum int64 // 从数据库中统计到的行数
|
||||
LastSync int64 // 保存上次全量同步时间
|
||||
}
|
||||
|
||||
var (
|
||||
AlertRulesByMetric = &AlertRulesByMetricCache{Data: make(map[string][]*models.AlertRule)}
|
||||
)
|
||||
|
||||
func (a *AlertRulesByMetricCache) GetBy(instance string) []*models.AlertRule {
|
||||
a.RLock()
|
||||
defer a.RUnlock()
|
||||
|
||||
return a.Data[instance]
|
||||
}
|
||||
|
||||
func (a *AlertRulesByMetricCache) SetAll(alertRulesMap map[string][]*models.AlertRule, lastUpdateTs, ruleNum, lastSync int64) {
|
||||
a.Lock()
|
||||
defer a.Unlock()
|
||||
|
||||
a.Data = alertRulesMap
|
||||
a.MaxUpdateTs = lastUpdateTs
|
||||
a.RuleNum = ruleNum
|
||||
a.LastSync = lastSync
|
||||
}
|
||||
|
||||
type AlertRulesTotalCache struct {
|
||||
sync.RWMutex
|
||||
Data map[int64]*models.AlertRule
|
||||
}
|
||||
|
||||
var AlertRules = &AlertRulesTotalCache{Data: make(map[int64]*models.AlertRule)}
|
||||
|
||||
func (a *AlertRulesTotalCache) Get(id int64) (*models.AlertRule, bool) {
|
||||
a.RLock()
|
||||
defer a.RUnlock()
|
||||
|
||||
alertRule, exists := a.Data[id]
|
||||
return alertRule, exists
|
||||
}
|
||||
|
||||
func (a *AlertRulesTotalCache) SetAll(alertRulesMap map[int64]*models.AlertRule) {
|
||||
a.Lock()
|
||||
defer a.Unlock()
|
||||
|
||||
a.Data = alertRulesMap
|
||||
}
|
||||
|
||||
// 获取所有PULL型规则的列表
|
||||
func (a *AlertRulesTotalCache) Pulls() []*models.AlertRule {
|
||||
a.RLock()
|
||||
defer a.RUnlock()
|
||||
|
||||
cnt := len(a.Data)
|
||||
ret := make([]*models.AlertRule, 0, cnt)
|
||||
|
||||
for _, rule := range a.Data {
|
||||
if rule.Type == models.PULL {
|
||||
ret = append(ret, rule)
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
package cache
|
||||
|
||||
import (
|
||||
cmap "github.com/orcaman/concurrent-map"
|
||||
)
|
||||
|
||||
var MetricDescMapper = cmap.New()
|
|
@ -1,27 +0,0 @@
|
|||
package cache
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
type ClasspathPrefixMap struct {
|
||||
sync.RWMutex
|
||||
Data map[int64][]int64
|
||||
}
|
||||
|
||||
var ClasspathPrefix = &ClasspathPrefixMap{Data: make(map[int64][]int64)}
|
||||
|
||||
func (c *ClasspathPrefixMap) Get(id int64) ([]int64, bool) {
|
||||
c.RLock()
|
||||
defer c.RUnlock()
|
||||
ids, exists := c.Data[id]
|
||||
return ids, exists
|
||||
}
|
||||
|
||||
func (c *ClasspathPrefixMap) SetAll(data map[int64][]int64) {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
|
||||
c.Data = data
|
||||
return
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
package cache
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
type ClasspathResMap struct {
|
||||
sync.RWMutex
|
||||
Data map[int64]*ClasspathAndRes
|
||||
}
|
||||
|
||||
type ClasspathAndRes struct {
|
||||
Res []string
|
||||
Classpath *models.Classpath
|
||||
}
|
||||
|
||||
// classpath_id -> classpath & res_idents
|
||||
var ClasspathRes = &ClasspathResMap{Data: make(map[int64]*ClasspathAndRes)}
|
||||
|
||||
func (c *ClasspathResMap) Get(id int64) (*ClasspathAndRes, bool) {
|
||||
c.RLock()
|
||||
defer c.RUnlock()
|
||||
resources, exists := c.Data[id]
|
||||
return resources, exists
|
||||
}
|
||||
|
||||
func (c *ClasspathResMap) SetAll(collectRulesMap map[int64]*ClasspathAndRes) {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
c.Data = collectRulesMap
|
||||
}
|
|
@ -1,32 +0,0 @@
|
|||
package cache
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
type CollectRuleOfIdentMap struct {
|
||||
sync.RWMutex
|
||||
Data map[string][]*models.CollectRule
|
||||
}
|
||||
|
||||
var CollectRulesOfIdent = &CollectRuleOfIdentMap{Data: make(map[string][]*models.CollectRule)}
|
||||
|
||||
func (c *CollectRuleOfIdentMap) GetBy(ident string) []*models.CollectRule {
|
||||
c.RLock()
|
||||
defer c.RUnlock()
|
||||
return c.Data[ident]
|
||||
}
|
||||
|
||||
func (c *CollectRuleOfIdentMap) Set(node string, collectRules []*models.CollectRule) {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
c.Data[node] = collectRules
|
||||
}
|
||||
|
||||
func (c *CollectRuleOfIdentMap) SetAll(collectRulesMap map[string][]*models.CollectRule) {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
c.Data = collectRulesMap
|
||||
}
|
|
@ -1,76 +0,0 @@
|
|||
package cache
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
type SafeDoubleMap struct {
|
||||
sync.RWMutex
|
||||
M map[string]map[string]struct{}
|
||||
}
|
||||
|
||||
// res_ident -> classpath_path -> struct{}{}
|
||||
var ResClasspath = &SafeDoubleMap{M: make(map[string]map[string]struct{})}
|
||||
|
||||
func (s *SafeDoubleMap) GetKeys() []string {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
keys := make([]string, 0, len(s.M))
|
||||
for key := range s.M {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
|
||||
return keys
|
||||
}
|
||||
|
||||
func (s *SafeDoubleMap) GetValues(key string) []string {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
valueMap, exists := s.M[key]
|
||||
if !exists {
|
||||
return []string{}
|
||||
}
|
||||
|
||||
values := make([]string, 0, len(valueMap))
|
||||
|
||||
for value := range valueMap {
|
||||
values = append(values, value)
|
||||
}
|
||||
|
||||
return values
|
||||
}
|
||||
|
||||
func (s *SafeDoubleMap) Exists(key string, value string) bool {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
if _, exists := s.M[key]; !exists {
|
||||
return false
|
||||
}
|
||||
|
||||
if _, exists := s.M[key][value]; !exists {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (s *SafeDoubleMap) Set(key string, value string) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
if _, exists := s.M[key]; !exists {
|
||||
s.M[key] = make(map[string]struct{})
|
||||
}
|
||||
|
||||
s.M[key][value] = struct{}{}
|
||||
}
|
||||
|
||||
func (s *SafeDoubleMap) SetAll(data map[string]map[string]struct{}) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
s.M = data
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
package cache
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
// resource_ident -> tags_map
|
||||
// 监控数据上报的时候,要把资源的tags附到指标数据上
|
||||
type ResTagsMap struct {
|
||||
sync.RWMutex
|
||||
Data map[string]ResourceAndTags
|
||||
}
|
||||
|
||||
type ResourceAndTags struct {
|
||||
Tags map[string]string
|
||||
Resource models.Resource
|
||||
}
|
||||
|
||||
var ResTags = &ResTagsMap{Data: make(map[string]ResourceAndTags)}
|
||||
|
||||
func (r *ResTagsMap) SetAll(m map[string]ResourceAndTags) {
|
||||
r.Lock()
|
||||
defer r.Unlock()
|
||||
r.Data = m
|
||||
}
|
||||
|
||||
func (r *ResTagsMap) Get(key string) (ResourceAndTags, bool) {
|
||||
r.RLock()
|
||||
defer r.RUnlock()
|
||||
|
||||
value, exists := r.Data[key]
|
||||
|
||||
return value, exists
|
||||
}
|
|
@ -1,48 +0,0 @@
|
|||
package cache
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
type UserMap struct {
|
||||
sync.RWMutex
|
||||
Data map[int64]*models.User
|
||||
}
|
||||
|
||||
var UserCache = &UserMap{Data: make(map[int64]*models.User)}
|
||||
|
||||
func (s *UserMap) GetBy(id int64) *models.User {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
return s.Data[id]
|
||||
}
|
||||
|
||||
func (s *UserMap) GetByIds(ids []int64) []*models.User {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
var users []*models.User
|
||||
for _, id := range ids {
|
||||
if s.Data[id] == nil {
|
||||
continue
|
||||
}
|
||||
users = append(users, s.Data[id])
|
||||
}
|
||||
|
||||
return users
|
||||
}
|
||||
|
||||
func (s *UserMap) GetById(id int64) *models.User {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
return s.Data[id]
|
||||
}
|
||||
|
||||
func (s *UserMap) SetAll(users map[int64]*models.User) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
s.Data = users
|
||||
}
|
|
@ -1,41 +0,0 @@
|
|||
package cache
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
type UserGroupMap struct {
|
||||
sync.RWMutex
|
||||
Data map[int64]*models.UserGroup
|
||||
}
|
||||
|
||||
var UserGroupCache = &UserGroupMap{Data: make(map[int64]*models.UserGroup)}
|
||||
|
||||
func (s *UserGroupMap) GetBy(id int64) *models.UserGroup {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
return s.Data[id]
|
||||
}
|
||||
|
||||
func (s *UserGroupMap) GetByIds(ids []int64) []*models.UserGroup {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
var userGroups []*models.UserGroup
|
||||
for _, id := range ids {
|
||||
if s.Data[id] == nil {
|
||||
continue
|
||||
}
|
||||
userGroups = append(userGroups, s.Data[id])
|
||||
}
|
||||
|
||||
return userGroups
|
||||
}
|
||||
|
||||
func (s *UserGroupMap) SetAll(userGroups map[int64]*models.UserGroup) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
s.Data = userGroups
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
package cache
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
type UserGroupMemberMap struct {
|
||||
sync.RWMutex
|
||||
Data map[int64]map[int64]struct{}
|
||||
}
|
||||
|
||||
// groupid -> userid
|
||||
var UserGroupMember = &UserGroupMemberMap{Data: make(map[int64]map[int64]struct{})}
|
||||
|
||||
func (m *UserGroupMemberMap) Get(id int64) (map[int64]struct{}, bool) {
|
||||
m.RLock()
|
||||
defer m.RUnlock()
|
||||
ids, exists := m.Data[id]
|
||||
return ids, exists
|
||||
}
|
||||
|
||||
func (m *UserGroupMemberMap) Exists(gid, uid int64) bool {
|
||||
m.RLock()
|
||||
defer m.RUnlock()
|
||||
uidMap, exists := m.Data[gid]
|
||||
if !exists {
|
||||
return false
|
||||
}
|
||||
|
||||
_, exists = uidMap[uid]
|
||||
return exists
|
||||
}
|
||||
|
||||
func (m *UserGroupMemberMap) SetAll(data map[int64]map[int64]struct{}) {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
m.Data = data
|
||||
}
|
186
changelog
186
changelog
|
@ -1,186 +0,0 @@
|
|||
3.1.1
|
||||
影响模块:n9e-job
|
||||
更新内容:job模块之前给监控用的callback地址method误设置为了get,是不对的,改成了post
|
||||
|
||||
3.1.2
|
||||
影响模块:n9e-rdb
|
||||
更新内容:子节点修改的时候,不允许修改为租户节点
|
||||
|
||||
3.1.3
|
||||
影响模块:n9e-monapi
|
||||
更新内容:对于P2、P3的告警会发送重复的两条
|
||||
|
||||
3.1.4
|
||||
影响模块:n9e-index n9e-judge n9e-monapi n9e-rdb n9e-transfer n9e-tsdb
|
||||
更新内容:把hbs的逻辑从monapi挪到rdb,拆分监控的权限点
|
||||
|
||||
3.1.5
|
||||
影响模块:n9e-monapi
|
||||
更新内容:清理策略的时候会空指针,node删除了,策略还在,此时会复现
|
||||
|
||||
3.1.6
|
||||
影响模块:n9e-ams etc/gop.yml
|
||||
更新内容:主机设备增加了扩展字段的管理,用于维护一些位置信息、过保信息,增加了新的sql:sql/n9e_ams_3.1.6.sql
|
||||
|
||||
3.2.0
|
||||
影响模块:n9e-agent etc/agent.yml
|
||||
更新内容:agent支持metrics指标采集能力,这个版本是为商业版本服务的,开源用户无需更新
|
||||
|
||||
3.3.0
|
||||
影响模块:n9e-rdb n9e-transfer n9e-judge n9e-ams n9e-monapi sql/n9e_rdb_3.3.0.sql etc/*.tpl
|
||||
更新内容:增强安全性:密码复杂度、cookie处理优化等;支持M3DB作为存储后端(如果要尝试M3需要修改transfer、monapi配置文件);修复告警引擎与条件串数的问题;为主机设备增加自定义字段的能力
|
||||
|
||||
3.3.1
|
||||
影响模块:n9e-job n9e-rdb n9e-agent n9e-ams n9e-judge
|
||||
更新内容:修复job模块的一个调度bug;rdb支持根据org搜索user;agent在fields变化时及时感知,fields和host扩展字段联动;解决上个版本引入的judge处理nodata的问题
|
||||
|
||||
3.4.0
|
||||
升级内容:
|
||||
- 增强了安全性,引入了session机制,写入cookie的内容从user.uuid变更为随机session.id
|
||||
- 修复部分sql注入漏洞
|
||||
- 告警引擎函数优化,all、c_avg_rate_abs等
|
||||
- 告警消息内容优化,可以展示设备名称和设备备注,感谢冯骐的PR
|
||||
- 增加了大盘导入导出功能
|
||||
升级方法:
|
||||
- 除了agent、tsdb、index的二进制不用升级,其他所有模块的二进制都要升级
|
||||
- job ams monapi rdb 四个模块的配置文件中的cookieName全部换成ecmc-sid
|
||||
- rdb的配置文件发生了较大变化,需要对照升级
|
||||
- sql目录下有几个3.4.0的sql,需要导入
|
||||
|
||||
3.4.1
|
||||
升级内容:
|
||||
- 修复日志监控采集策略配置了tag但是无法编辑的问题
|
||||
升级方法:
|
||||
- 更新monapi的二进制即可
|
||||
|
||||
3.5.0
|
||||
升级内容:
|
||||
- 引入了组件监控模块prober,内置了mysql、redis、mongo监控采集能力
|
||||
- 引入了内置监控大盘和内置告警策略,可以在任意节点一键导入内置大盘和策略
|
||||
升级方法:
|
||||
- n9e-monapi n9e-rdb n9e-transfer n9e-ams n9e-job 的二进制要升级
|
||||
- n9e-agent也可以升级,解决了进程监控的性能问题,如果不在意可以不升级
|
||||
- n9e-prober 模块需要新部署
|
||||
- sql目录下有个3.5.0的sql patch文件,需要导入
|
||||
- etc目录下新增了screen、alert两个目录,需要拷贝到生产环境
|
||||
- etc目录下新增了plugins目录,需要随着prober模块走
|
||||
- etc/address.yml里增加prober的配置
|
||||
|
||||
3.5.1
|
||||
升级内容:
|
||||
- monapi里的alarmEnabled默认值设置为true
|
||||
- agent进程采集,忽略EOF日志
|
||||
- agent增加一个接口获取endpoint
|
||||
- agent日志监控支持一种新的日志时间格式
|
||||
- 修复组件监控调整采集频率不生效的问题
|
||||
升级方法:
|
||||
- 替换n9e-monapi n9e-prober n9e-agent二进制,升级pub下的前端资源文件
|
||||
|
||||
3.5.2
|
||||
升级内容:
|
||||
- prober模板支持匿名结构体,结构体嵌套
|
||||
- prober插件添加了对TLS的支持
|
||||
- 修复prober上报没有port的问题
|
||||
升级方法:
|
||||
- 替换n9e-prober n9e-monapi二进制,升级pub下的前端资源文件
|
||||
|
||||
3.6.0
|
||||
升级内容:
|
||||
- prober模块支持nginx、elasticsearch、prometheus的监控采集,prometheus转换时姑且干掉了 Histogram 和 Summary
|
||||
- 告警消息中节点挂载关系做了去重处理
|
||||
升级方法:
|
||||
- 替换n9e-prober n9e-monapi二进制
|
||||
|
||||
3.7.0
|
||||
升级内容:
|
||||
- 调整session清理频率
|
||||
- 新增zookeeper、tengine、rabbitmq、haproxy、ping、telnet相关采集工具
|
||||
- bugfix:集群部署的时候,多个redis实例,judge只能识别最后一个实例的问题
|
||||
升级方法:
|
||||
- sql/n9e_rdb-v3.7.0.sql 有个新的表结构,需要导入一下
|
||||
- 替换n9e-rdb n9e-prober n9e-judge n9e-monapi二进制,前端没有升级
|
||||
- 将etc/plugins里zookeeper.yml,tengine.yml等新增的yml文件复制到配置文件里
|
||||
|
||||
3.7.1
|
||||
升级内容:
|
||||
- prober采集增加dryrun测试方法,可以测试是否真的能采集到数据
|
||||
- 增加dns_query插件,对dns做监控
|
||||
- 内置大盘,增加n9e内置模块大盘
|
||||
- 如果存储使用m3,支持在transfer配置一次查询每条线最多返回的原始点数
|
||||
- 日志监控,可以把最后一条日志放到extra字段,报警的时候可以展示,需要升级n9e-agent n9e-monapi
|
||||
- 修复agent对进程监控采集的bug,进程cpu使用采集的不准确
|
||||
- 修改告警策略配置多个团队的时候不生效的问题
|
||||
- monapi支持一个新的timestamp格式
|
||||
升级方法:
|
||||
- sql/n9e_mon-v3.7.1.sql变更了表结构,需要执行一下
|
||||
- 将etc/plugins里的dns_query.yml放到生产环境的etc/plugins目录下
|
||||
- 将etc/screen/n9e_modules放到生产环境的etc/screen目录下
|
||||
- 替换n9e-rdb n9e-prober n9e-monapi n9e-transfer n9e-agent二进制
|
||||
|
||||
3.8.0
|
||||
升级内容:
|
||||
- monapi优化告警策略中用户信息补全逻辑
|
||||
- rdb新增接口,查询项目下用户拥有的资源权限点
|
||||
- transfer查询索引接口支持指定时间范围
|
||||
- prober去掉组件采集默认的白名单设置
|
||||
升级方法:
|
||||
- 替换n9e-rdb n9e-prober n9e-monapi n9e-transfer二进制
|
||||
- 将etc/password-changed-email.tpl放到生产环境的etc目录下
|
||||
|
||||
4.0.0
|
||||
升级内容:
|
||||
- 服务端模块合并为一个模块
|
||||
- agentd和server的调用全部走rpc
|
||||
|
||||
重新安装:见 https://n9e.didiyun.com/v4/docs/install/
|
||||
|
||||
升级方法:
|
||||
- 使用新的etc替换掉原来的etc
|
||||
- 使用etc/nginx.conf替换原来的nginx.conf
|
||||
- n9e-prober替换旧的n9e-prober
|
||||
- n9e-agentd替换n9e-agent
|
||||
- n9e-server替换n9e-rdb、n9e-ams、n9e-job、n9e-monapi、n9e-transfer、n9e-judge
|
||||
|
||||
4.0.1
|
||||
升级内容:
|
||||
- 修复消息通知的问题
|
||||
|
||||
重新安装:见 https://n9e.didiyun.com/v4/docs/install/
|
||||
|
||||
升级方法:
|
||||
- 将 *.tpl 文件放到 etc/tpl 下
|
||||
- 替换etc/server.yml
|
||||
- 替换n9e-server
|
||||
|
||||
4.0.2
|
||||
升级内容:
|
||||
- 优化告警接收人补全逻辑
|
||||
- 增加pospostgresql监控插件
|
||||
|
||||
重新安装:见 https://n9e.didiyun.com/v4/docs/install/
|
||||
|
||||
升级方法:
|
||||
- 替换n9e-server n9e-prober
|
||||
|
||||
4.0.3
|
||||
升级内容:
|
||||
- 修复nodata恢复告警重复问题
|
||||
|
||||
升级方法:
|
||||
- 替换n9e-server
|
||||
|
||||
5.0.0-rc1
|
||||
升级内容:
|
||||
- 发布v5预览版
|
||||
|
||||
部署方式:
|
||||
- 见文档 https://n9e.didiyun.com/docs/install/
|
||||
|
||||
5.0.0-rc2
|
||||
升级内容:
|
||||
- 修复若干问题
|
||||
- 新增告警策略,监控大盘导入、导出和内置模板功能
|
||||
- 新增概览页面
|
||||
|
||||
部署方式:
|
||||
- 见文档 https://n9e.didiyun.com/docs/install/
|
176
config/config.go
176
config/config.go
|
@ -1,176 +0,0 @@
|
|||
package config
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/viper"
|
||||
"github.com/toolkits/pkg/file"
|
||||
|
||||
"github.com/didi/nightingale/v5/backend"
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
"github.com/didi/nightingale/v5/pkg/i18n"
|
||||
"github.com/didi/nightingale/v5/pkg/iconf"
|
||||
"github.com/didi/nightingale/v5/pkg/ilog"
|
||||
)
|
||||
|
||||
type ConfigStruct struct {
|
||||
Logger ilog.Config `yaml:"logger"`
|
||||
HTTP httpSection `yaml:"http"`
|
||||
RPC rpcSection `yaml:"rpc"`
|
||||
LDAP models.LdapSection `yaml:"ldap"`
|
||||
MySQL models.MysqlSection `yaml:"mysql"`
|
||||
Heartbeat heartbeatSection `yaml:"heartbeat"`
|
||||
I18N i18n.Config `yaml:"i18n"`
|
||||
Judge judgeSection `yaml:"judge"`
|
||||
Alert alertSection `yaml:"alert"`
|
||||
Trans transSection `yaml:"trans"`
|
||||
ContactKeys []contactKey `yaml:"contactKeys"`
|
||||
NotifyChannels []string `yaml:"notifyChannels"`
|
||||
Tpl tplSection `yaml:"tpl"`
|
||||
}
|
||||
|
||||
type tplSection struct {
|
||||
AlertRulePath string `yaml:"alertRulePath"`
|
||||
DashboardPath string `yaml:"dashboardPath"`
|
||||
}
|
||||
|
||||
type alertSection struct {
|
||||
NotifyScriptPath string `yaml:"notifyScriptPath"`
|
||||
NotifyScriptConcurrency int `yaml:"notifyScriptConcurrency"`
|
||||
MutedAlertPersist bool `yaml:"mutedAlertPersist"`
|
||||
}
|
||||
|
||||
type transSection struct {
|
||||
Enable bool `yaml:"enable"`
|
||||
Backend backend.BackendSection `yaml:"backend"`
|
||||
}
|
||||
|
||||
type judgeSection struct {
|
||||
ReadBatch int `yaml:"readBatch"`
|
||||
ConnTimeout int `yaml:"connTimeout"`
|
||||
CallTimeout int `yaml:"callTimeout"`
|
||||
WriterNum int `yaml:"writerNum"`
|
||||
ConnMax int `yaml:"connMax"`
|
||||
ConnIdle int `yaml:"connIdle"`
|
||||
}
|
||||
|
||||
type heartbeatSection struct {
|
||||
IP string `yaml:"ip"`
|
||||
LocalAddr string `yaml:"-"`
|
||||
Interval int64 `yaml:"interval"`
|
||||
}
|
||||
|
||||
type httpSection struct {
|
||||
Mode string `yaml:"mode"`
|
||||
Access bool `yaml:"access"`
|
||||
Listen string `yaml:"listen"`
|
||||
Pprof bool `yaml:"pprof"`
|
||||
CookieName string `yaml:"cookieName"`
|
||||
CookieDomain string `yaml:"cookieDomain"`
|
||||
CookieSecure bool `yaml:"cookieSecure"`
|
||||
CookieHttpOnly bool `yaml:"cookieHttpOnly"`
|
||||
CookieMaxAge int `yaml:"cookieMaxAge"`
|
||||
CookieSecret string `yaml:"cookieSecret"`
|
||||
CsrfSecret string `yaml:"csrfSecret"`
|
||||
}
|
||||
|
||||
type rpcSection struct {
|
||||
Listen string `yaml:"listen"`
|
||||
}
|
||||
|
||||
type contactKey struct {
|
||||
Label string `yaml:"label" json:"label"`
|
||||
Key string `yaml:"key" json:"key"`
|
||||
}
|
||||
|
||||
var Config *ConfigStruct
|
||||
|
||||
func Parse() error {
|
||||
ymlFile := iconf.GetYmlFile("server")
|
||||
if ymlFile == "" {
|
||||
return fmt.Errorf("configuration file of server not found")
|
||||
}
|
||||
|
||||
bs, err := file.ReadBytes(ymlFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read yml[%s]: %v", ymlFile, err)
|
||||
}
|
||||
|
||||
viper.SetConfigType("yaml")
|
||||
err = viper.ReadConfig(bytes.NewBuffer(bs))
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read yml[%s]: %v", ymlFile, err)
|
||||
}
|
||||
|
||||
// default value settings
|
||||
viper.SetDefault("i18n.lang", "zh")
|
||||
viper.SetDefault("heartbeat.interval", 1000)
|
||||
viper.SetDefault("judge.readBatch", 2000)
|
||||
viper.SetDefault("judge.connTimeout", 2000)
|
||||
viper.SetDefault("judge.callTimeout", 5000)
|
||||
viper.SetDefault("judge.writerNum", 256)
|
||||
viper.SetDefault("judge.connMax", 2560)
|
||||
viper.SetDefault("judge.connIdle", 256)
|
||||
viper.SetDefault("alert.notifyScriptPath", "./etc/script/notify.py")
|
||||
viper.SetDefault("alert.notifyScriptConcurrency", 200)
|
||||
viper.SetDefault("alert.mutedAlertPersist", true)
|
||||
viper.SetDefault("trans.backend.prometheus.lookbackDeltaMinute", 2)
|
||||
viper.SetDefault("trans.backend.prometheus.maxConcurrentQuery", 30)
|
||||
viper.SetDefault("trans.backend.prometheus.maxSamples", 50000000)
|
||||
viper.SetDefault("trans.backend.prometheus.maxFetchAllSeriesLimitMinute", 5)
|
||||
viper.SetDefault("trans.backend.prometheus.slowLogRecordSecond", 3)
|
||||
viper.SetDefault("trans.backend.prometheus.defaultFetchSeriesQl", `{__name__=~"system.*"}`)
|
||||
viper.SetDefault("tpl.alertRulePath", "./etc/alert_rule")
|
||||
viper.SetDefault("tpl.dashboardPath", "./etc/dashboard")
|
||||
|
||||
err = viper.Unmarshal(&Config)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read yml[%s]: %v", ymlFile, err)
|
||||
}
|
||||
|
||||
fmt.Println("config.file:", ymlFile)
|
||||
|
||||
if Config.Heartbeat.IP == "" {
|
||||
// auto detect
|
||||
Config.Heartbeat.IP = fmt.Sprint(GetOutboundIP())
|
||||
|
||||
if Config.Heartbeat.IP == "" {
|
||||
fmt.Println("heartbeat ip auto got is blank")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
}
|
||||
// 用户在配置文件中指定了heartbeat.ip ,用于本机没有网络,下面的报错,那么需要将Config.Heartbeat.LocalAddr设置一下
|
||||
// auto get outbound ip fail: dial udp 8.8.8.8:80: connect: network is unreachable
|
||||
|
||||
port := strings.Split(Config.RPC.Listen, ":")[1]
|
||||
Config.Heartbeat.LocalAddr = Config.Heartbeat.IP + ":" + port
|
||||
|
||||
// 正常情况肯定不是127.0.0.1,但是,如果就是单机部署,并且这个机器没有网络,比如本地调试并且本机没网的时候
|
||||
// if Config.Heartbeat.IP == "127.0.0.1" {
|
||||
// fmt.Println("heartbeat ip is 127.0.0.1 and it is useless, so, exit")
|
||||
// os.Exit(1)
|
||||
// }
|
||||
|
||||
fmt.Println("heartbeat.ip:", Config.Heartbeat.IP)
|
||||
fmt.Printf("heartbeat.interval: %dms\n", Config.Heartbeat.Interval)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get preferred outbound ip of this machine
|
||||
func GetOutboundIP() net.IP {
|
||||
conn, err := net.Dial("udp", "8.8.8.8:80")
|
||||
if err != nil {
|
||||
fmt.Println("auto get outbound ip fail:", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
localAddr := conn.LocalAddr().(*net.UDPAddr)
|
||||
|
||||
return localAddr.IP
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
package config
|
||||
|
||||
// Server周期性去数据库心跳,给自己起的名字
|
||||
const EndpointName = "server_rpc"
|
||||
|
||||
var Version = "not specified"
|
|
@ -1,71 +0,0 @@
|
|||
package config
|
||||
|
||||
import "github.com/didi/nightingale/v5/pkg/i18n"
|
||||
|
||||
var (
|
||||
dict = map[string]string{
|
||||
"Login fail, check your username and password": "登录失败,请检查您的用户名和密码",
|
||||
"Internal server error, try again later please": "系统内部错误,请稍后再试",
|
||||
"Each user has at most two tokens": "每个用户至多创建两个密钥",
|
||||
"No such token": "密钥不存在",
|
||||
"Username is blank": "用户名不能为空",
|
||||
"Username has invalid characters": "用户名含有非法字符",
|
||||
"Nickname has invalid characters": "用户昵称含有非法字符",
|
||||
"Phone invalid": "手机号格式有误",
|
||||
"Email invalid": "邮箱格式有误",
|
||||
"Incorrect old password": "旧密码错误",
|
||||
"Username %s already exists": "用户名(%s)已存在",
|
||||
"No such user": "用户不存在",
|
||||
"UserGroup %s already exists": "用户组(%s)已存在",
|
||||
"Group name has invalid characters": "分组名称含有非法字符",
|
||||
"Group note has invalid characters": "分组备注含有非法字符",
|
||||
"No such user group": "用户组不存在",
|
||||
"Classpath path has invalid characters": "机器分组路径含有非法字符",
|
||||
"Classpath note has invalid characters": "机器分组路径备注含有非法字符",
|
||||
"There are still resources under the classpath": "机器分组路径下仍然挂有资源",
|
||||
"There are still collect rules under the classpath": "机器分组路径下仍然存在采集策略",
|
||||
"No such classpath": "机器分组路径不存在",
|
||||
"Classpath %s already exists": "机器分组路径(%s)已存在",
|
||||
"Preset classpath %s cannot delete": "内置机器分组(%s)不允许删除",
|
||||
"No such mute config": "此屏蔽配置不存在",
|
||||
"DashboardGroup name has invalid characters": "大盘分组名称含有非法字符",
|
||||
"DashboardGroup name is blank": "大盘分组名称为空",
|
||||
"DashboardGroup %s already exists": "大盘分组(%s)已存在",
|
||||
"No such dashboard group": "大盘分组不存在",
|
||||
"Dashboard name has invalid characters": "大盘名称含有非法字符",
|
||||
"Dashboard %s already exists": "监控大盘(%s)已存在",
|
||||
"ChartGroup name has invalid characters": "图表分组名称含有非法字符",
|
||||
"No such dashboard": "监控大盘不存在",
|
||||
"No such chart group": "图表分组不存在",
|
||||
"No such chart": "图表不存在",
|
||||
"There are still dashboards under the group": "分组下面仍然存在监控大盘,请先从组内移出",
|
||||
"AlertRuleGroup name has invalid characters": "告警规则分组含有非法字符",
|
||||
"AlertRuleGroup %s already exists": "告警规则分组(%s)已存在",
|
||||
"There are still alert rules under the group": "分组下面仍然存在告警规则",
|
||||
"AlertRule name has invalid characters": "告警规则含有非法字符",
|
||||
"No such alert rule": "告警规则不存在",
|
||||
"No such alert rule group": "告警规则分组不存在",
|
||||
"No such alert event": "告警事件不存在",
|
||||
"Alert rule %s already exists": "告警规则(%s)已存在",
|
||||
"No such collect rule": "采集规则不存在",
|
||||
"Decoded metric description empty": "导入的指标释义列表为空",
|
||||
"User disabled": "用户已被禁用",
|
||||
"Tags(%s) invalid": "标签(%s)格式不合法",
|
||||
"Resource filter(Func:%s)'s param invalid": "资源过滤条件(函数:%s)参数不合法(为空或包含空格都不合法)",
|
||||
"Tags filter(Func:%s)'s param invalid": "标签过滤条件(函数:%s)参数不合法(为空或包含空格都不合法)",
|
||||
"Regexp: %s cannot be compiled": "正则表达式(%s)不合法,无法编译",
|
||||
"AppendTags(%s) invalid": "附件标签(%s)格式不合法",
|
||||
"Regexp %s matching failed": "正则表达式 %s 匹配失败",
|
||||
"Regexp %s matched, but cannot get substring()": "主正则 %s 匹配成功,但无法匹配到子串",
|
||||
"TagKey or TagValue contains illegal characters[:,/=\r\n\t]": "标签KEY或者标签值包含非法字符串[:,/=\r\n\t]",
|
||||
"Resource cannot delete in preset classpath": "预置分组不能删除资源",
|
||||
"No such resource %s": "不存在该资源(%s)",
|
||||
}
|
||||
langDict = map[string]map[string]string{
|
||||
"zh": dict,
|
||||
}
|
||||
)
|
||||
|
||||
func init() {
|
||||
i18n.DictRegister(langDict)
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
ibexetc
|
||||
initsql
|
||||
mysqletc
|
||||
n9eetc
|
||||
prometc
|
||||
build.sh
|
||||
docker-compose.yaml
|
|
@ -0,0 +1,10 @@
|
|||
FROM ubuntu:21.04
|
||||
|
||||
WORKDIR /app
|
||||
ADD n9e /app
|
||||
RUN chmod +x n9e
|
||||
|
||||
EXPOSE 19000
|
||||
EXPOSE 18000
|
||||
|
||||
CMD ["/app/n9e", "-h"]
|
|
@ -0,0 +1,15 @@
|
|||
#!/bin/sh
|
||||
|
||||
if [ $# -ne 1 ]; then
|
||||
echo "$0 <tag>"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
tag=$1
|
||||
|
||||
echo "tag: ${tag}"
|
||||
|
||||
rm -rf n9e && cp ../n9e . && docker build -t nightingale:${tag} .
|
||||
|
||||
docker tag nightingale:${tag} ulric2019/nightingale:${tag}
|
||||
docker push ulric2019/nightingale:${tag}
|
|
@ -0,0 +1,175 @@
|
|||
version: "3.7"
|
||||
|
||||
networks:
|
||||
nightingale:
|
||||
driver: bridge
|
||||
|
||||
services:
|
||||
mysql:
|
||||
image: "mysql:5.7"
|
||||
container_name: mysql
|
||||
hostname: mysql
|
||||
restart: always
|
||||
ports:
|
||||
- "3306:3306"
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
MYSQL_ROOT_PASSWORD: 1234
|
||||
volumes:
|
||||
- ./mysqldata:/var/lib/mysql/
|
||||
- ./initsql:/docker-entrypoint-initdb.d/
|
||||
- ./mysqletc/my.cnf:/etc/my.cnf
|
||||
networks:
|
||||
- nightingale
|
||||
|
||||
redis:
|
||||
image: "redis:6.2"
|
||||
container_name: redis
|
||||
hostname: redis
|
||||
restart: always
|
||||
ports:
|
||||
- "6379:6379"
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
networks:
|
||||
- nightingale
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus
|
||||
container_name: prometheus
|
||||
hostname: prometheus
|
||||
restart: always
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
volumes:
|
||||
- ./prometc:/etc/prometheus
|
||||
ports:
|
||||
- "9090:9090"
|
||||
networks:
|
||||
- nightingale
|
||||
command:
|
||||
- "--config.file=/etc/prometheus/prometheus.yml"
|
||||
- "--storage.tsdb.path=/prometheus"
|
||||
- "--web.console.libraries=/usr/share/prometheus/console_libraries"
|
||||
- "--web.console.templates=/usr/share/prometheus/consoles"
|
||||
- "--enable-feature=remote-write-receiver"
|
||||
- "--query.lookback-delta=2m"
|
||||
|
||||
ibex:
|
||||
image: ulric2019/ibex:0.2
|
||||
container_name: ibex
|
||||
hostname: ibex
|
||||
restart: always
|
||||
environment:
|
||||
GIN_MODE: release
|
||||
TZ: Asia/Shanghai
|
||||
ports:
|
||||
- "10090:10090"
|
||||
- "20090:20090"
|
||||
volumes:
|
||||
- ./ibexetc:/app/etc
|
||||
networks:
|
||||
- nightingale
|
||||
depends_on:
|
||||
- mysql
|
||||
links:
|
||||
- mysql:mysql
|
||||
command:
|
||||
- "/app/ibex"
|
||||
- "server"
|
||||
|
||||
nwebapi:
|
||||
image: ulric2019/nightingale:0.4
|
||||
container_name: nwebapi
|
||||
hostname: nwebapi
|
||||
restart: always
|
||||
environment:
|
||||
GIN_MODE: release
|
||||
TZ: Asia/Shanghai
|
||||
volumes:
|
||||
- ./n9eetc:/app/etc
|
||||
ports:
|
||||
- "18000:18000"
|
||||
networks:
|
||||
- nightingale
|
||||
depends_on:
|
||||
- mysql
|
||||
- redis
|
||||
- prometheus
|
||||
- ibex
|
||||
links:
|
||||
- mysql:mysql
|
||||
- redis:redis
|
||||
- prometheus:prometheus
|
||||
- ibex:ibex
|
||||
command:
|
||||
- "/app/n9e"
|
||||
- "webapi"
|
||||
|
||||
nserver:
|
||||
image: ulric2019/nightingale:0.4
|
||||
container_name: nserver
|
||||
hostname: nserver
|
||||
restart: always
|
||||
environment:
|
||||
GIN_MODE: release
|
||||
TZ: Asia/Shanghai
|
||||
volumes:
|
||||
- ./n9eetc:/app/etc
|
||||
ports:
|
||||
- "19000:19000"
|
||||
networks:
|
||||
- nightingale
|
||||
depends_on:
|
||||
- mysql
|
||||
- redis
|
||||
- prometheus
|
||||
- ibex
|
||||
links:
|
||||
- mysql:mysql
|
||||
- redis:redis
|
||||
- prometheus:prometheus
|
||||
- ibex:ibex
|
||||
command:
|
||||
- "/app/n9e"
|
||||
- "server"
|
||||
|
||||
telegraf:
|
||||
image: "telegraf:1.20.3"
|
||||
container_name: "telegraf"
|
||||
hostname: "telegraf01"
|
||||
restart: always
|
||||
environment:
|
||||
TZ: Asia/Shanghai
|
||||
volumes:
|
||||
- ./telegrafetc/telegraf.conf:/etc/telegraf/telegraf.conf
|
||||
ports:
|
||||
- "8125:8125/udp"
|
||||
- "8092:8092/udp"
|
||||
- "8094:8094/tcp"
|
||||
networks:
|
||||
- nightingale
|
||||
depends_on:
|
||||
- nserver
|
||||
links:
|
||||
- nserver:nserver
|
||||
|
||||
agentd:
|
||||
image: ulric2019/ibex:0.2
|
||||
container_name: agentd
|
||||
hostname: agentd
|
||||
restart: always
|
||||
environment:
|
||||
GIN_MODE: release
|
||||
TZ: Asia/Shanghai
|
||||
volumes:
|
||||
- ./ibexetc:/app/etc
|
||||
networks:
|
||||
- nightingale
|
||||
depends_on:
|
||||
- ibex
|
||||
links:
|
||||
- ibex:ibex
|
||||
command:
|
||||
- "/app/ibex"
|
||||
- "agentd"
|
|
@ -0,0 +1,38 @@
|
|||
# debug, release
|
||||
RunMode = "release"
|
||||
|
||||
# task meta storage dir
|
||||
MetaDir = "./meta"
|
||||
|
||||
[HTTP]
|
||||
Enable = true
|
||||
# http listening address
|
||||
Host = "0.0.0.0"
|
||||
# http listening port
|
||||
Port = 2090
|
||||
# https cert file path
|
||||
CertFile = ""
|
||||
# https key file path
|
||||
KeyFile = ""
|
||||
# whether print access log
|
||||
PrintAccessLog = true
|
||||
# whether enable pprof
|
||||
PProf = false
|
||||
# http graceful shutdown timeout, unit: s
|
||||
ShutdownTimeout = 30
|
||||
# max content length: 64M
|
||||
MaxContentLength = 67108864
|
||||
# http server read timeout, unit: s
|
||||
ReadTimeout = 20
|
||||
# http server write timeout, unit: s
|
||||
WriteTimeout = 40
|
||||
# http server idle timeout, unit: s
|
||||
IdleTimeout = 120
|
||||
|
||||
[Heartbeat]
|
||||
# unit: ms
|
||||
Interval = 1000
|
||||
# rpc servers
|
||||
Servers = ["ibex:20090"]
|
||||
# $ip or $hostname or specified string
|
||||
Host = "telegraf01"
|
|
@ -0,0 +1,97 @@
|
|||
# debug, release
|
||||
RunMode = "release"
|
||||
|
||||
[Log]
|
||||
# log write dir
|
||||
Dir = "logs-server"
|
||||
# log level: DEBUG INFO WARNING ERROR
|
||||
Level = "DEBUG"
|
||||
# stdout, stderr, file
|
||||
Output = "stdout"
|
||||
# # rotate by time
|
||||
# KeepHours: 4
|
||||
# # rotate by size
|
||||
# RotateNum = 3
|
||||
# # unit: MB
|
||||
# RotateSize = 256
|
||||
|
||||
[HTTP]
|
||||
Enable = true
|
||||
# http listening address
|
||||
Host = "0.0.0.0"
|
||||
# http listening port
|
||||
Port = 10090
|
||||
# https cert file path
|
||||
CertFile = ""
|
||||
# https key file path
|
||||
KeyFile = ""
|
||||
# whether print access log
|
||||
PrintAccessLog = true
|
||||
# whether enable pprof
|
||||
PProf = false
|
||||
# http graceful shutdown timeout, unit: s
|
||||
ShutdownTimeout = 30
|
||||
# max content length: 64M
|
||||
MaxContentLength = 67108864
|
||||
# http server read timeout, unit: s
|
||||
ReadTimeout = 20
|
||||
# http server write timeout, unit: s
|
||||
WriteTimeout = 40
|
||||
# http server idle timeout, unit: s
|
||||
IdleTimeout = 120
|
||||
|
||||
[BasicAuth]
|
||||
# using when call apis
|
||||
ibex = "ibex"
|
||||
|
||||
[RPC]
|
||||
Listen = "0.0.0.0:20090"
|
||||
|
||||
[Heartbeat]
|
||||
# auto detect if blank
|
||||
IP = ""
|
||||
# unit: ms
|
||||
Interval = 1000
|
||||
|
||||
[Output]
|
||||
# database | remote
|
||||
ComeFrom = "database"
|
||||
AgtdPort = 2090
|
||||
|
||||
[Gorm]
|
||||
# enable debug mode or not
|
||||
Debug = false
|
||||
# mysql postgres
|
||||
DBType = "mysql"
|
||||
# unit: s
|
||||
MaxLifetime = 7200
|
||||
# max open connections
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# table prefix
|
||||
TablePrefix = ""
|
||||
|
||||
[MySQL]
|
||||
# mysql address host:port
|
||||
Address = "mysql:3306"
|
||||
# mysql username
|
||||
User = "root"
|
||||
# mysql password
|
||||
Password = "1234"
|
||||
# database name
|
||||
DBName = "ibex"
|
||||
# connection params
|
||||
Parameters = "charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
|
||||
|
||||
[Postgres]
|
||||
# pg address host:port
|
||||
Address = "postgres:5432"
|
||||
# pg user
|
||||
User = "root"
|
||||
# pg password
|
||||
Password = "1234"
|
||||
# database name
|
||||
DBName = "ibex"
|
||||
# ssl mode
|
||||
SSLMode = "disable"
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,3 @@
|
|||
GRANT ALL ON *.* TO 'root'@'127.0.0.1' IDENTIFIED BY '1234';
|
||||
GRANT ALL ON *.* TO 'root'@'localhost' IDENTIFIED BY '1234';
|
||||
GRANT ALL ON *.* TO 'root'@'%' IDENTIFIED BY '1234';
|
|
@ -0,0 +1,372 @@
|
|||
set names utf8mb4;
|
||||
|
||||
drop database if exists n9e_v5;
|
||||
create database n9e_v5;
|
||||
use n9e_v5;
|
||||
|
||||
CREATE TABLE `user` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`username` varchar(64) not null comment 'login name, cannot rename',
|
||||
`nickname` varchar(64) not null comment 'display name, chinese name',
|
||||
`password` varchar(128) not null default '',
|
||||
`phone` varchar(16) not null default '',
|
||||
`email` varchar(64) not null default '',
|
||||
`portrait` varchar(255) not null default '' comment 'portrait image url',
|
||||
`roles` varchar(255) not null comment 'Admin | Standard | Guest, split by space',
|
||||
`contacts` varchar(1024) comment 'json e.g. {wecom:xx, dingtalk_robot_token:yy}',
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
`update_at` bigint not null default 0,
|
||||
`update_by` varchar(64) not null default '',
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY (`username`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
insert into `user`(id, username, nickname, password, roles, create_at, create_by, update_at, update_by) values(1, 'root', '超管', 'root.2020', 'Admin', unix_timestamp(now()), 'system', unix_timestamp(now()), 'system');
|
||||
|
||||
CREATE TABLE `user_group` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`name` varchar(128) not null default '',
|
||||
`note` varchar(255) not null default '',
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
`update_at` bigint not null default 0,
|
||||
`update_by` varchar(64) not null default '',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY (`create_by`),
|
||||
KEY (`update_at`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
insert into user_group(id, name, create_at, create_by, update_at, update_by) values(1, 'demo-root-group', unix_timestamp(now()), 'root', unix_timestamp(now()), 'root');
|
||||
|
||||
CREATE TABLE `user_group_member` (
|
||||
`group_id` bigint unsigned not null,
|
||||
`user_id` bigint unsigned not null,
|
||||
KEY (`group_id`),
|
||||
KEY (`user_id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
insert into user_group_member(group_id, user_id) values(1, 1);
|
||||
|
||||
CREATE TABLE `configs` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`ckey` varchar(191) not null,
|
||||
`cval` varchar(1024) not null default '',
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY (`ckey`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `role` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`name` varchar(191) not null default '',
|
||||
`note` varchar(255) not null default '',
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY (`name`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
insert into `role`(name, note) values('Admin', 'Administrator role');
|
||||
insert into `role`(name, note) values('Standard', 'Ordinary user role');
|
||||
insert into `role`(name, note) values('Guest', 'Readonly user role');
|
||||
|
||||
CREATE TABLE `role_operation`(
|
||||
`role_name` varchar(128) not null,
|
||||
`operation` varchar(191) not null,
|
||||
KEY (`role_name`),
|
||||
KEY (`operation`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
-- Admin is special, who has no concrete operation but can do anything.
|
||||
insert into `role_operation`(role_name, operation) values('Guest', 'menu_prom_dash');
|
||||
insert into `role_operation`(role_name, operation) values('Guest', 'menu_target_dash');
|
||||
insert into `role_operation`(role_name, operation) values('Guest', 'menu_dashboard');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'menu_prom_dash');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'menu_target_dash');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'menu_dashboard');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'menu_user');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'menu_user_group');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'menu_busi_group');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'menu_target');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'menu_alert_rule');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'menu_alert_mute');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'menu_alert_subscribe');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'menu_alert_cur_event');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'menu_alert_his_event');
|
||||
|
||||
-- for alert_rule | collect_rule | mute | dashboard grouping
|
||||
CREATE TABLE `busi_group` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`name` varchar(191) not null,
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
`update_at` bigint not null default 0,
|
||||
`update_by` varchar(64) not null default '',
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY (`name`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
insert into busi_group(id, name, create_at, create_by, update_at, update_by) values(1, 'Default Business Group', unix_timestamp(now()), 'root', unix_timestamp(now()), 'root');
|
||||
|
||||
CREATE TABLE `busi_group_member` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`busi_group_id` bigint not null comment 'busi group id',
|
||||
`user_group_id` bigint not null comment 'user group id',
|
||||
`perm_flag` char(2) not null comment 'ro | rw',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY (`busi_group_id`),
|
||||
KEY (`user_group_id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
insert into busi_group_member(busi_group_id, user_group_id, perm_flag) values(1, 1, "rw");
|
||||
|
||||
CREATE TABLE `dashboard` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`group_id` bigint not null default 0 comment 'busi group id',
|
||||
`name` varchar(191) not null,
|
||||
`tags` varchar(255) not null comment 'split by space',
|
||||
`configs` varchar(4096) comment 'dashboard variables',
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
`update_at` bigint not null default 0,
|
||||
`update_by` varchar(64) not null default '',
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY (`group_id`, `name`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
-- auto create the first subclass 'Default chart group' of dashboard
|
||||
CREATE TABLE `chart_group` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`dashboard_id` bigint unsigned not null,
|
||||
`name` varchar(255) not null,
|
||||
`weight` int not null default 0,
|
||||
PRIMARY KEY (`id`),
|
||||
KEY (`dashboard_id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `chart` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`group_id` bigint unsigned not null comment 'chart group id',
|
||||
`configs` varchar(8192),
|
||||
`weight` int not null default 0,
|
||||
PRIMARY KEY (`id`),
|
||||
KEY (`group_id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `chart_share` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`cluster` varchar(128) not null,
|
||||
`configs` varchar(8192),
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
primary key (`id`),
|
||||
key (`create_at`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `alert_rule` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`group_id` bigint not null default 0 comment 'busi group id',
|
||||
`cluster` varchar(128) not null,
|
||||
`name` varchar(255) not null,
|
||||
`note` varchar(255) not null,
|
||||
`severity` tinyint(1) not null comment '0:Emergency 1:Warning 2:Notice',
|
||||
`disabled` tinyint(1) not null comment '0:enabled 1:disabled',
|
||||
`prom_for_duration` int not null comment 'prometheus for, unit:s',
|
||||
`prom_ql` varchar(4096) not null comment 'promql',
|
||||
`prom_eval_interval` int not null comment 'evaluate interval',
|
||||
`enable_stime` char(5) not null default '00:00',
|
||||
`enable_etime` char(5) not null default '23:59',
|
||||
`enable_days_of_week` varchar(32) not null default '' comment 'split by space: 0 1 2 3 4 5 6',
|
||||
`notify_recovered` tinyint(1) not null comment 'whether notify when recovery',
|
||||
`notify_channels` varchar(255) not null default '' comment 'split by space: sms voice email dingtalk wecom',
|
||||
`notify_groups` varchar(255) not null default '' comment 'split by space: 233 43',
|
||||
`notify_repeat_step` int not null default 0 comment 'unit: min',
|
||||
`callbacks` varchar(255) not null default '' comment 'split by space: http://a.com/api/x http://a.com/api/y',
|
||||
`runbook_url` varchar(255),
|
||||
`append_tags` varchar(255) not null default '' comment 'split by space: service=n9e mod=api',
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
`update_at` bigint not null default 0,
|
||||
`update_by` varchar(64) not null default '',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY (`group_id`),
|
||||
KEY (`update_at`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `alert_mute` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`group_id` bigint not null default 0 comment 'busi group id',
|
||||
`cluster` varchar(128) not null,
|
||||
`tags` varchar(2048) not null default '' comment 'json,map,tagkey->regexp|value',
|
||||
`cause` varchar(255) not null default '',
|
||||
`btime` bigint not null default 0 comment 'begin time',
|
||||
`etime` bigint not null default 0 comment 'end time',
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY (`create_at`),
|
||||
KEY (`group_id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `alert_subscribe` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`group_id` bigint not null default 0 comment 'busi group id',
|
||||
`cluster` varchar(128) not null,
|
||||
`rule_id` bigint not null default 0,
|
||||
`tags` varchar(2048) not null default '' comment 'json,map,tagkey->regexp|value',
|
||||
`redefine_severity` tinyint(1) default 0 comment 'is redefine severity?',
|
||||
`new_severity` tinyint(1) not null comment '0:Emergency 1:Warning 2:Notice',
|
||||
`redefine_channels` tinyint(1) default 0 comment 'is redefine channels?',
|
||||
`new_channels` varchar(255) not null default '' comment 'split by space: sms voice email dingtalk wecom',
|
||||
`user_group_ids` varchar(250) not null comment 'split by space 1 34 5, notify cc to user_group_ids',
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
`update_at` bigint not null default 0,
|
||||
`update_by` varchar(64) not null default '',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY (`update_at`),
|
||||
KEY (`group_id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `target` (
|
||||
`id` bigint unsigned not null auto_increment,
|
||||
`group_id` bigint not null default 0 comment 'busi group id',
|
||||
`cluster` varchar(128) not null comment 'append to alert event as field',
|
||||
`ident` varchar(191) not null comment 'target id',
|
||||
`note` varchar(255) not null default '' comment 'append to alert event as field',
|
||||
`tags` varchar(512) not null default '' comment 'append to series data as tags, split by space, append external space at suffix',
|
||||
`update_at` bigint not null default 0,
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY (`ident`),
|
||||
KEY (`group_id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
-- case1: target_idents; case2: target_tags
|
||||
-- CREATE TABLE `collect_rule` (
|
||||
-- `id` bigint unsigned not null auto_increment,
|
||||
-- `group_id` bigint not null default 0 comment 'busi group id',
|
||||
-- `cluster` varchar(128) not null,
|
||||
-- `target_idents` varchar(512) not null default '' comment 'ident list, split by space',
|
||||
-- `target_tags` varchar(512) not null default '' comment 'filter targets by tags, split by space',
|
||||
-- `name` varchar(191) not null default '',
|
||||
-- `note` varchar(255) not null default '',
|
||||
-- `step` int not null,
|
||||
-- `type` varchar(64) not null comment 'e.g. port proc log plugin',
|
||||
-- `data` text not null,
|
||||
-- `append_tags` varchar(255) not null default '' comment 'split by space: e.g. mod=n9e dept=cloud',
|
||||
-- `create_at` bigint not null default 0,
|
||||
-- `create_by` varchar(64) not null default '',
|
||||
-- `update_at` bigint not null default 0,
|
||||
-- `update_by` varchar(64) not null default '',
|
||||
-- PRIMARY KEY (`id`),
|
||||
-- KEY (`group_id`, `type`, `name`)
|
||||
-- ) ENGINE=InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `alert_cur_event` (
|
||||
`id` bigint unsigned not null comment 'use alert_his_event.id',
|
||||
`cluster` varchar(128) not null,
|
||||
`group_id` bigint unsigned not null comment 'busi group id of rule',
|
||||
`hash` varchar(64) not null comment 'rule_id + vector_pk',
|
||||
`rule_id` bigint unsigned not null,
|
||||
`rule_name` varchar(255) not null,
|
||||
`rule_note` varchar(512) not null default 'alert rule note',
|
||||
`severity` tinyint(1) not null comment '0:Emergency 1:Warning 2:Notice',
|
||||
`prom_for_duration` int not null comment 'prometheus for, unit:s',
|
||||
`prom_ql` varchar(4096) not null comment 'promql',
|
||||
`prom_eval_interval` int not null comment 'evaluate interval',
|
||||
`callbacks` varchar(255) not null default '' comment 'split by space: http://a.com/api/x http://a.com/api/y',
|
||||
`runbook_url` varchar(255),
|
||||
`notify_recovered` tinyint(1) not null comment 'whether notify when recovery',
|
||||
`notify_channels` varchar(255) not null default '' comment 'split by space: sms voice email dingtalk wecom',
|
||||
`notify_groups` varchar(255) not null default '' comment 'split by space: 233 43',
|
||||
`notify_repeat_next` bigint not null default 0 comment 'next timestamp to notify, get repeat settings from rule',
|
||||
`target_ident` varchar(191) not null default '' comment 'target ident, also in tags',
|
||||
`target_note` varchar(191) not null default '' comment 'target note',
|
||||
`trigger_time` bigint not null,
|
||||
`trigger_value` varchar(255) not null,
|
||||
`tags` varchar(1024) not null default '' comment 'merge data_tags rule_tags, split by ,,',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY (`hash`),
|
||||
KEY (`rule_id`),
|
||||
KEY (`trigger_time`, `group_id`),
|
||||
KEY (`notify_repeat_next`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `alert_his_event` (
|
||||
`id` bigint unsigned not null AUTO_INCREMENT,
|
||||
`is_recovered` tinyint(1) not null,
|
||||
`cluster` varchar(128) not null,
|
||||
`group_id` bigint unsigned not null comment 'busi group id of rule',
|
||||
`hash` varchar(64) not null comment 'rule_id + vector_pk',
|
||||
`rule_id` bigint unsigned not null,
|
||||
`rule_name` varchar(255) not null,
|
||||
`rule_note` varchar(512) not null default 'alert rule note',
|
||||
`severity` tinyint(1) not null comment '0:Emergency 1:Warning 2:Notice',
|
||||
`prom_for_duration` int not null comment 'prometheus for, unit:s',
|
||||
`prom_ql` varchar(4096) not null comment 'promql',
|
||||
`prom_eval_interval` int not null comment 'evaluate interval',
|
||||
`callbacks` varchar(255) not null default '' comment 'split by space: http://a.com/api/x http://a.com/api/y',
|
||||
`runbook_url` varchar(255),
|
||||
`notify_recovered` tinyint(1) not null comment 'whether notify when recovery',
|
||||
`notify_channels` varchar(255) not null default '' comment 'split by space: sms voice email dingtalk wecom',
|
||||
`notify_groups` varchar(255) not null default '' comment 'split by space: 233 43',
|
||||
`target_ident` varchar(191) not null default '' comment 'target ident, also in tags',
|
||||
`target_note` varchar(191) not null default '' comment 'target note',
|
||||
`trigger_time` bigint not null,
|
||||
`trigger_value` varchar(255) not null,
|
||||
`tags` varchar(1024) not null default '' comment 'merge data_tags rule_tags, split by ,,',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY (`hash`),
|
||||
KEY (`rule_id`),
|
||||
KEY (`trigger_time`, `group_id`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `task_tpl`
|
||||
(
|
||||
`id` int unsigned NOT NULL AUTO_INCREMENT,
|
||||
`group_id` int unsigned not null comment 'busi group id',
|
||||
`title` varchar(255) not null default '',
|
||||
`account` varchar(64) not null,
|
||||
`batch` int unsigned not null default 0,
|
||||
`tolerance` int unsigned not null default 0,
|
||||
`timeout` int unsigned not null default 0,
|
||||
`pause` varchar(255) not null default '',
|
||||
`script` text not null,
|
||||
`args` varchar(512) not null default '',
|
||||
`tags` varchar(255) not null default '' comment 'split by space',
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
`update_at` bigint not null default 0,
|
||||
`update_by` varchar(64) not null default '',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY (`group_id`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `task_tpl_host`
|
||||
(
|
||||
`ii` int unsigned NOT NULL AUTO_INCREMENT,
|
||||
`id` int unsigned not null comment 'task tpl id',
|
||||
`host` varchar(128) not null comment 'ip or hostname',
|
||||
PRIMARY KEY (`ii`),
|
||||
KEY (`id`, `host`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
||||
|
||||
CREATE TABLE `task_record`
|
||||
(
|
||||
`id` bigint unsigned not null comment 'ibex task id',
|
||||
`group_id` bigint not null comment 'busi group id',
|
||||
`ibex_address` varchar(128) not null,
|
||||
`ibex_auth_user` varchar(128) not null default '',
|
||||
`ibex_auth_pass` varchar(128) not null default '',
|
||||
`title` varchar(255) not null default '',
|
||||
`account` varchar(64) not null,
|
||||
`batch` int unsigned not null default 0,
|
||||
`tolerance` int unsigned not null default 0,
|
||||
`timeout` int unsigned not null default 0,
|
||||
`pause` varchar(255) not null default '',
|
||||
`script` text not null,
|
||||
`args` varchar(512) not null default '',
|
||||
`create_at` bigint not null default 0,
|
||||
`create_by` varchar(64) not null default '',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY (`create_at`, `group_id`),
|
||||
KEY (`create_by`)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4;
|
|
@ -0,0 +1,5 @@
|
|||
[mysqld]
|
||||
pid-file = /var/run/mysqld/mysqld.pid
|
||||
socket = /var/run/mysqld/mysqld.sock
|
||||
datadir = /var/lib/mysql
|
||||
bind-address = 0.0.0.0
|
Binary file not shown.
|
@ -0,0 +1 @@
|
|||
cpu_usage_idle: CPU空闲率(单位:%)
|
|
@ -0,0 +1,162 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: UTF-8 -*-
|
||||
import sys
|
||||
import json
|
||||
import urllib2
|
||||
import smtplib
|
||||
from email.mime.text import MIMEText
|
||||
|
||||
notify_channel_funcs = {
|
||||
"email":"email",
|
||||
"sms":"sms",
|
||||
"voice":"voice",
|
||||
"dingtalk":"dingtalk",
|
||||
"wecom":"wecom"
|
||||
}
|
||||
|
||||
mail_host = "smtp.163.com"
|
||||
mail_port = 994
|
||||
mail_user = "ulricqin"
|
||||
mail_pass = "password"
|
||||
mail_from = "ulricqin@163.com"
|
||||
|
||||
class Sender(object):
|
||||
@classmethod
|
||||
def send_email(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
|
||||
emails = {}
|
||||
for u in users:
|
||||
if u.get("email"):
|
||||
emails[u.get("email")] = 1
|
||||
|
||||
if not emails:
|
||||
return
|
||||
|
||||
recipients = emails.keys()
|
||||
mail_body = payload.get('tpls').get("mailbody.tpl", "mailbody.tpl not found")
|
||||
message = MIMEText(mail_body, 'html', 'utf-8')
|
||||
message['From'] = mail_from
|
||||
message['To'] = ", ".join(recipients)
|
||||
message["Subject"] = payload.get('tpls').get("subject.tpl", "subject.tpl not found")
|
||||
|
||||
try:
|
||||
smtp = smtplib.SMTP_SSL(mail_host, mail_port)
|
||||
smtp.login(mail_user, mail_pass)
|
||||
smtp.sendmail(mail_from, recipients, message.as_string())
|
||||
smtp.close()
|
||||
except smtplib.SMTPException, error:
|
||||
print(error)
|
||||
|
||||
@classmethod
|
||||
def send_wecom(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
|
||||
tokens = {}
|
||||
|
||||
for u in users:
|
||||
contacts = u.get("contacts")
|
||||
if contacts.get("wecom_robot_token", ""):
|
||||
tokens[contacts.get("wecom_robot_token", "")] = 1
|
||||
|
||||
opener = urllib2.build_opener(urllib2.HTTPHandler())
|
||||
method = "POST"
|
||||
|
||||
for t in tokens:
|
||||
url = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key={}".format(t)
|
||||
body = {
|
||||
"msgtype": "markdown",
|
||||
"markdown": {
|
||||
"content": payload.get('tpls').get("wecom.tpl", "wecom.tpl not found")
|
||||
}
|
||||
}
|
||||
request = urllib2.Request(url, data=json.dumps(body))
|
||||
request.add_header("Content-Type",'application/json;charset=utf-8')
|
||||
request.get_method = lambda: method
|
||||
try:
|
||||
connection = opener.open(request)
|
||||
print(connection.read())
|
||||
except urllib2.HTTPError, error:
|
||||
print(error)
|
||||
|
||||
@classmethod
|
||||
def send_dingtalk(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
|
||||
tokens = {}
|
||||
phones = {}
|
||||
|
||||
for u in users:
|
||||
if u.get("phone"):
|
||||
phones[u.get("phone")] = 1
|
||||
|
||||
contacts = u.get("contacts")
|
||||
if contacts.get("dingtalk_robot_token", ""):
|
||||
tokens[contacts.get("dingtalk_robot_token", "")] = 1
|
||||
|
||||
opener = urllib2.build_opener(urllib2.HTTPHandler())
|
||||
method = "POST"
|
||||
|
||||
for t in tokens:
|
||||
url = "https://oapi.dingtalk.com/robot/send?access_token={}".format(t)
|
||||
body = {
|
||||
"msgtype": "text",
|
||||
"text": {
|
||||
"content": payload.get('tpls').get("dingtalk.tpl", "dingtalk.tpl not found")
|
||||
},
|
||||
"at": {
|
||||
"atMobiles": phones.keys(),
|
||||
"isAtAll": False
|
||||
}
|
||||
}
|
||||
request = urllib2.Request(url, data=json.dumps(body))
|
||||
request.add_header("Content-Type",'application/json;charset=utf-8')
|
||||
request.get_method = lambda: method
|
||||
try:
|
||||
connection = opener.open(request)
|
||||
print(connection.read())
|
||||
except urllib2.HTTPError, error:
|
||||
print(error)
|
||||
|
||||
@classmethod
|
||||
def send_sms(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
phones = {}
|
||||
for u in users:
|
||||
if u.get("phone"):
|
||||
phones[u.get("phone")] = 1
|
||||
if phones:
|
||||
print("send_sms not implemented, phones: {}".format(phones.keys()))
|
||||
|
||||
@classmethod
|
||||
def send_voice(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
phones = {}
|
||||
for u in users:
|
||||
if u.get("phone"):
|
||||
phones[u.get("phone")] = 1
|
||||
if phones:
|
||||
print("send_voice not implemented, phones: {}".format(phones.keys()))
|
||||
|
||||
def main():
|
||||
payload = json.load(sys.stdin)
|
||||
with open(".payload", 'w') as f:
|
||||
f.write(json.dumps(payload, indent=4))
|
||||
for ch in payload.get('event').get('notify_channels'):
|
||||
send_func_name = "send_{}".format(notify_channel_funcs.get(ch.strip()))
|
||||
if not hasattr(Sender, send_func_name):
|
||||
print("function: {} not found", send_func_name)
|
||||
continue
|
||||
send_func = getattr(Sender, send_func_name)
|
||||
send_func(payload)
|
||||
|
||||
def hello():
|
||||
print("hello nightingale")
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) == 1:
|
||||
main()
|
||||
elif sys.argv[1] == "hello":
|
||||
hello()
|
||||
else:
|
||||
print("I am confused")
|
|
@ -0,0 +1,188 @@
|
|||
# debug, release
|
||||
RunMode = "release"
|
||||
|
||||
# my cluster name
|
||||
ClusterName = "Default"
|
||||
|
||||
[Log]
|
||||
# log write dir
|
||||
Dir = "logs"
|
||||
# log level: DEBUG INFO WARNING ERROR
|
||||
Level = "INFO"
|
||||
# stdout, stderr, file
|
||||
Output = "stdout"
|
||||
# # rotate by time
|
||||
# KeepHours: 4
|
||||
# # rotate by size
|
||||
# RotateNum = 3
|
||||
# # unit: MB
|
||||
# RotateSize = 256
|
||||
|
||||
[HTTP]
|
||||
# http listening address
|
||||
Host = "0.0.0.0"
|
||||
# http listening port
|
||||
Port = 19000
|
||||
# https cert file path
|
||||
CertFile = ""
|
||||
# https key file path
|
||||
KeyFile = ""
|
||||
# whether print access log
|
||||
PrintAccessLog = false
|
||||
# whether enable pprof
|
||||
PProf = false
|
||||
# http graceful shutdown timeout, unit: s
|
||||
ShutdownTimeout = 30
|
||||
# max content length: 64M
|
||||
MaxContentLength = 67108864
|
||||
# http server read timeout, unit: s
|
||||
ReadTimeout = 20
|
||||
# http server write timeout, unit: s
|
||||
WriteTimeout = 40
|
||||
# http server idle timeout, unit: s
|
||||
IdleTimeout = 120
|
||||
|
||||
# [BasicAuth]
|
||||
# user002 = "ccc26da7b9aba533cbb263a36c07dcc9"
|
||||
|
||||
[Heartbeat]
|
||||
# auto detect if blank
|
||||
IP = ""
|
||||
# unit ms
|
||||
Interval = 1000
|
||||
|
||||
[Alerting]
|
||||
NotifyScriptPath = "./etc/script/notify.py"
|
||||
NotifyConcurrency = 100
|
||||
|
||||
[Alerting.RedisPub]
|
||||
Enable = false
|
||||
# complete redis key: ${ChannelPrefix} + ${Cluster}
|
||||
ChannelPrefix = "/alerts/"
|
||||
|
||||
[NoData]
|
||||
Metric = "target_up"
|
||||
# unit: second
|
||||
Interval = 15
|
||||
|
||||
[Ibex]
|
||||
# callback: ${ibex}/${tplid}/${host}
|
||||
Address = "ibex:10090"
|
||||
# basic auth
|
||||
BasicAuthUser = "ibex"
|
||||
BasicAuthPass = "ibex"
|
||||
# unit: ms
|
||||
Timeout = 3000
|
||||
|
||||
[Redis]
|
||||
# address, ip:port
|
||||
Address = "redis:6379"
|
||||
# requirepass
|
||||
Password = ""
|
||||
# # db
|
||||
# DB = 0
|
||||
|
||||
[Gorm]
|
||||
# enable debug mode or not
|
||||
Debug = false
|
||||
# mysql postgres
|
||||
DBType = "mysql"
|
||||
# unit: s
|
||||
MaxLifetime = 7200
|
||||
# max open connections
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# table prefix
|
||||
TablePrefix = ""
|
||||
# enable auto migrate or not
|
||||
EnableAutoMigrate = false
|
||||
|
||||
[MySQL]
|
||||
# mysql address host:port
|
||||
Address = "mysql:3306"
|
||||
# mysql username
|
||||
User = "root"
|
||||
# mysql password
|
||||
Password = "1234"
|
||||
# database name
|
||||
DBName = "n9e_v5"
|
||||
# connection params
|
||||
Parameters = "charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
|
||||
|
||||
[Postgres]
|
||||
# pg address host:port
|
||||
Address = "postgres:5432"
|
||||
# pg user
|
||||
User = "root"
|
||||
# pg password
|
||||
Password = "1234"
|
||||
# database name
|
||||
DBName = "n9e_v5"
|
||||
# ssl mode
|
||||
SSLMode = "disable"
|
||||
|
||||
[Reader]
|
||||
# prometheus base url
|
||||
Url = "http://prometheus:9090"
|
||||
# Basic auth username
|
||||
BasicAuthUser = ""
|
||||
# Basic auth password
|
||||
BasicAuthPass = ""
|
||||
# timeout settings, unit: ms
|
||||
Timeout = 30000
|
||||
DialTimeout = 10000
|
||||
TLSHandshakeTimeout = 30000
|
||||
ExpectContinueTimeout = 1000
|
||||
IdleConnTimeout = 90000
|
||||
# time duration, unit: ms
|
||||
KeepAlive = 30000
|
||||
MaxConnsPerHost = 0
|
||||
MaxIdleConns = 100
|
||||
MaxIdleConnsPerHost = 10
|
||||
|
||||
[WriterOpt]
|
||||
# queue max size
|
||||
QueueMaxSize = 10000000
|
||||
# once pop samples number from queue
|
||||
QueuePopSize = 2000
|
||||
# unit: ms
|
||||
SleepInterval = 50
|
||||
|
||||
[[Writers]]
|
||||
Name = "prom"
|
||||
Url = "http://prometheus:9090/api/v1/write"
|
||||
# Basic auth username
|
||||
BasicAuthUser = ""
|
||||
# Basic auth password
|
||||
BasicAuthPass = ""
|
||||
# timeout settings, unit: ms
|
||||
Timeout = 30000
|
||||
DialTimeout = 10000
|
||||
TLSHandshakeTimeout = 30000
|
||||
ExpectContinueTimeout = 1000
|
||||
IdleConnTimeout = 90000
|
||||
# time duration, unit: ms
|
||||
KeepAlive = 30000
|
||||
MaxConnsPerHost = 0
|
||||
MaxIdleConns = 100
|
||||
MaxIdleConnsPerHost = 100
|
||||
|
||||
# [[Writers]]
|
||||
# Name = "m3db"
|
||||
# Url = "http://m3db:7201/api/v1/prom/remote/write"
|
||||
# # Basic auth username
|
||||
# BasicAuthUser = ""
|
||||
# # Basic auth password
|
||||
# BasicAuthPass = ""
|
||||
# timeout settings, unit: ms
|
||||
# Timeout = 30000
|
||||
# DialTimeout = 10000
|
||||
# TLSHandshakeTimeout = 30000
|
||||
# ExpectContinueTimeout = 1000
|
||||
# IdleConnTimeout = 90000
|
||||
# # time duration, unit: ms
|
||||
# KeepAlive = 30000
|
||||
# MaxConnsPerHost = 0
|
||||
# MaxIdleConns = 100
|
||||
# MaxIdleConnsPerHost = 100
|
|
@ -0,0 +1,6 @@
|
|||
级别状态: S{{.Severity}} {{if .IsRecovered}}Recovered{{else}}Triggered{{end}}
|
||||
规则名称: {{.RuleName}}{{if .RuleNote}}
|
||||
规则备注: {{.RuleNote}}{{end}}
|
||||
监控指标: {{.TagsJSON}}
|
||||
触发时间: {{timeformat .TriggerTime}}
|
||||
触发时值: {{.TriggerValue}}
|
|
@ -129,7 +129,7 @@
|
|||
<div class="wrapper">
|
||||
<div class="main">
|
||||
<header>
|
||||
<h3 class="title">{{Sname}}</h3>
|
||||
<h3 class="title">{{.RuleName}}</h3>
|
||||
<p class="sub-desc"></p>
|
||||
</header>
|
||||
|
||||
|
@ -138,61 +138,45 @@
|
|||
<div class="body">
|
||||
<table cellspacing="0" cellpadding="0" border="0">
|
||||
<tbody>
|
||||
% if IsAlert:
|
||||
<tr class="fail">
|
||||
<th>级别状态:</th>
|
||||
<td>{{Status}}</td>
|
||||
</tr>
|
||||
% else:
|
||||
{{if .IsRecovered}}
|
||||
<tr class="succ">
|
||||
<th>级别状态:</th>
|
||||
<td>{{Status}}</td>
|
||||
<td>S{{.Severity}} Recovered</td>
|
||||
</tr>
|
||||
% end
|
||||
{{else}}
|
||||
<tr class="fail">
|
||||
<th>级别状态:</th>
|
||||
<td>S{{.Severity}} Triggered</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
|
||||
% if IsMachineDep:
|
||||
<tr>
|
||||
<th>告警设备:</th>
|
||||
<td>{{Ident}}</td>
|
||||
<th>策略备注:</th>
|
||||
<td>{{.RuleNote}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>所属分组:</th>
|
||||
<td>
|
||||
{{Classpath}}<br />
|
||||
</td>
|
||||
<th>设备备注:</th>
|
||||
<td>{{.TargetNote}}</td>
|
||||
</tr>
|
||||
% end
|
||||
<tr>
|
||||
<th>监控指标:</th>
|
||||
<td>{{Metric}}</td>
|
||||
<td>{{.TagsJSON}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>tags:</th>
|
||||
<td>{{Tags}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>当前值:</th>
|
||||
<td>{{Value}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>报警说明:</th>
|
||||
<td>
|
||||
{{ReadableExpression}}
|
||||
</td>
|
||||
<th>触发时值:</th>
|
||||
<td>{{.TriggerValue}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>触发时间:</th>
|
||||
<td>
|
||||
{{TriggerTime}}
|
||||
{{timeformat .TriggerTime}}
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>报警详情:</th>
|
||||
<td>{{Elink}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>报警策略:</th>
|
||||
<td>{{Slink}}</td>
|
||||
<th>PromQL:</th>
|
||||
<td>
|
||||
{{.PromQl}}
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
@ -200,11 +184,6 @@
|
|||
<hr>
|
||||
|
||||
<footer>
|
||||
<div class="footer-logo">
|
||||
<a href="https://n9e.didiyun.com">
|
||||
<img src="https://s3-gz01.didistatic.com/n9e-pub/image/n9e-logo-bg-white.png" class="footer-logo-image" alt="">
|
||||
</a>
|
||||
</div>
|
||||
<div class="copyright" style="font-style: italic">
|
||||
我们希望与您一起,将监控这个事情,做到极致!
|
||||
</div>
|
|
@ -0,0 +1 @@
|
|||
{{if .IsRecovered}}Recovered{{else}}Triggered{{end}}: {{.RuleName}} {{.TagsJSON}}
|
|
@ -0,0 +1,6 @@
|
|||
**级别状态**: {{if .IsRecovered}}<font color="info">S{{.Severity}} Recovered</font>{{else}}<font color="warning">S{{.Severity}} Triggered</font>{{end}}
|
||||
**规则标题**: {{.RuleName}}{{if .RuleNote}}
|
||||
**规则备注**: {{.RuleNote}}{{end}}
|
||||
**监控指标**: {{.TagsJSON}}
|
||||
**触发时间**: {{timeformat .TriggerTime}}
|
||||
**触发时值**: {{.TriggerValue}}
|
|
@ -0,0 +1,166 @@
|
|||
# debug, release
|
||||
RunMode = "release"
|
||||
|
||||
# # custom i18n dict config
|
||||
# I18N = "./etc/i18n.json"
|
||||
|
||||
# do not change
|
||||
AdminRole = "Admin"
|
||||
|
||||
# Linkage with notify.py script
|
||||
NotifyChannels = [ "email", "dingtalk", "wecom" ]
|
||||
|
||||
[[ContactKeys]]
|
||||
Label = "Wecom Robot Token"
|
||||
Key = "wecom_robot_token"
|
||||
|
||||
[[ContactKeys]]
|
||||
Label = "Dingtalk Robot Token"
|
||||
Key = "dingtalk_robot_token"
|
||||
|
||||
[Log]
|
||||
# log write dir
|
||||
Dir = "logs"
|
||||
# log level: DEBUG INFO WARNING ERROR
|
||||
Level = "DEBUG"
|
||||
# stdout, stderr, file
|
||||
Output = "stdout"
|
||||
# # rotate by time
|
||||
# KeepHours: 4
|
||||
# # rotate by size
|
||||
# RotateNum = 3
|
||||
# # unit: MB
|
||||
# RotateSize = 256
|
||||
|
||||
[HTTP]
|
||||
# http listening address
|
||||
Host = "0.0.0.0"
|
||||
# http listening port
|
||||
Port = 18000
|
||||
# https cert file path
|
||||
CertFile = ""
|
||||
# https key file path
|
||||
KeyFile = ""
|
||||
# whether print access log
|
||||
PrintAccessLog = true
|
||||
# whether enable pprof
|
||||
PProf = false
|
||||
# http graceful shutdown timeout, unit: s
|
||||
ShutdownTimeout = 30
|
||||
# max content length: 64M
|
||||
MaxContentLength = 67108864
|
||||
# http server read timeout, unit: s
|
||||
ReadTimeout = 20
|
||||
# http server write timeout, unit: s
|
||||
WriteTimeout = 40
|
||||
# http server idle timeout, unit: s
|
||||
IdleTimeout = 120
|
||||
|
||||
[JWTAuth]
|
||||
# signing key
|
||||
SigningKey = "5b94a0fd640fe2765af826acfe42d151"
|
||||
# unit: min
|
||||
AccessExpired = 1500
|
||||
# unit: min
|
||||
RefreshExpired = 10080
|
||||
RedisKeyPrefix = "/jwt/"
|
||||
|
||||
[BasicAuth]
|
||||
user001 = "ccc26da7b9aba533cbb263a36c07dcc5"
|
||||
|
||||
[LDAP]
|
||||
Enable = false
|
||||
Host = "ldap.example.org"
|
||||
Port = 389
|
||||
BaseDn = "dc=example,dc=org"
|
||||
# AD: manange@example.org
|
||||
BindUser = "cn=manager,dc=example,dc=org"
|
||||
BindPass = "*******"
|
||||
# openldap format e.g. (&(uid=%s))
|
||||
# AD format e.g. (&(sAMAccountName=%s))
|
||||
AuthFilter = "(&(uid=%s))"
|
||||
CoverAttributes = true
|
||||
TLS = false
|
||||
StartTLS = true
|
||||
|
||||
[LDAP.Attributes]
|
||||
Nickname = "cn"
|
||||
Phone = "mobile"
|
||||
Email = "mail"
|
||||
|
||||
[Redis]
|
||||
# address, ip:port
|
||||
Address = "redis:6379"
|
||||
# requirepass
|
||||
Password = ""
|
||||
# # db
|
||||
# DB = 0
|
||||
|
||||
[Gorm]
|
||||
# enable debug mode or not
|
||||
Debug = true
|
||||
# mysql postgres
|
||||
DBType = "mysql"
|
||||
# unit: s
|
||||
MaxLifetime = 7200
|
||||
# max open connections
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# table prefix
|
||||
TablePrefix = ""
|
||||
# enable auto migrate or not
|
||||
EnableAutoMigrate = false
|
||||
|
||||
[MySQL]
|
||||
# mysql address host:port
|
||||
Address = "mysql:3306"
|
||||
# mysql username
|
||||
User = "root"
|
||||
# mysql password
|
||||
Password = "1234"
|
||||
# database name
|
||||
DBName = "n9e_v5"
|
||||
# connection params
|
||||
Parameters = "charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
|
||||
|
||||
[Postgres]
|
||||
# pg address host:port
|
||||
Address = "postgres:5432"
|
||||
# pg user
|
||||
User = "root"
|
||||
# pg password
|
||||
Password = "1234"
|
||||
# database name
|
||||
DBName = "n9e_v5"
|
||||
# ssl mode
|
||||
SSLMode = "disable"
|
||||
|
||||
[[Clusters]]
|
||||
# Prometheus cluster name
|
||||
Name = "Default"
|
||||
# Prometheus APIs base url
|
||||
Prom = "http://prometheus:9090"
|
||||
# Basic auth username
|
||||
BasicAuthUser = ""
|
||||
# Basic auth password
|
||||
BasicAuthPass = ""
|
||||
# timeout settings, unit: ms
|
||||
Timeout = 30000
|
||||
DialTimeout = 10000
|
||||
TLSHandshakeTimeout = 30000
|
||||
ExpectContinueTimeout = 1000
|
||||
IdleConnTimeout = 90000
|
||||
# time duration, unit: ms
|
||||
KeepAlive = 30000
|
||||
MaxConnsPerHost = 0
|
||||
MaxIdleConns = 100
|
||||
MaxIdleConnsPerHost = 100
|
||||
|
||||
[Ibex]
|
||||
Address = "http://ibex:10090"
|
||||
# basic auth
|
||||
BasicAuthUser = "ibex"
|
||||
BasicAuthPass = "ibex"
|
||||
# unit: ms
|
||||
Timeout = 3000
|
|
@ -0,0 +1,29 @@
|
|||
# my global config
|
||||
global:
|
||||
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
|
||||
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
|
||||
# scrape_timeout is set to the global default (10s).
|
||||
|
||||
# Alertmanager configuration
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
# - alertmanager:9093
|
||||
|
||||
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||
rule_files:
|
||||
# - "first_rules.yml"
|
||||
# - "second_rules.yml"
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's Prometheus itself.
|
||||
scrape_configs:
|
||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
- job_name: 'prometheus'
|
||||
|
||||
# metrics_path defaults to '/metrics'
|
||||
# scheme defaults to 'http'.
|
||||
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
File diff suppressed because it is too large
Load Diff
|
@ -1,11 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Nightingale</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Hello, Nightingale</h1>
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -1,33 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
now=$(date +%s)
|
||||
|
||||
echo '[
|
||||
{
|
||||
"metric": "plugin_example_gauge",
|
||||
"tags": {
|
||||
"type": "testcase",
|
||||
"author": "ulric"
|
||||
},
|
||||
"value": '${now}',
|
||||
"type": "gauge"
|
||||
},
|
||||
{
|
||||
"metric": "plugin_example_rate",
|
||||
"tags": {
|
||||
"type": "testcase",
|
||||
"author": "ulric"
|
||||
},
|
||||
"value": '${now}',
|
||||
"type": "rate"
|
||||
},
|
||||
{
|
||||
"metric": "plugin_example_increase",
|
||||
"tags": {
|
||||
"type": "testcase",
|
||||
"author": "ulric"
|
||||
},
|
||||
"value": '${now}',
|
||||
"type": "increase"
|
||||
}
|
||||
]'
|
|
@ -1,191 +0,0 @@
|
|||
[
|
||||
{
|
||||
"name": "dns解析时间超过2秒",
|
||||
"type": 1,
|
||||
"expression": {
|
||||
"evaluation_interval": 10,
|
||||
"promql": "probe_dns_lookup_time_seconds>2"
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 3,
|
||||
"notify_channels": "",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626935980,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
},
|
||||
{
|
||||
"name": "https证书过期时间小于7天",
|
||||
"type": 1,
|
||||
"expression": {
|
||||
"evaluation_interval": 10,
|
||||
"promql": "(probe_ssl_earliest_cert_expiry - time()) / 3600 / 24 <7"
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 1,
|
||||
"notify_channels": "",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626935909,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
},
|
||||
{
|
||||
"name": "http响应数据传输占比超过70%",
|
||||
"type": 1,
|
||||
"expression": {
|
||||
"evaluation_interval": 10,
|
||||
"promql": "100 * avg(probe_http_duration_seconds{phase=\"transfer\"})by(instance) / sum(probe_http_duration_seconds) by(instance) >70"
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 2,
|
||||
"notify_channels": "",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626936324,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
},
|
||||
{
|
||||
"name": "http接口探测失败",
|
||||
"type": 1,
|
||||
"expression": {
|
||||
"evaluation_interval": 10,
|
||||
"promql": "probe_success{job=~\".*http.*\"}==0"
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 1,
|
||||
"notify_channels": "",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626935627,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
},
|
||||
{
|
||||
"name": "http接口探测耗时超过3秒",
|
||||
"type": 1,
|
||||
"expression": {
|
||||
"evaluation_interval": 10,
|
||||
"promql": "sum(probe_http_duration_seconds) by (instance) >3\n"
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 3,
|
||||
"notify_channels": "",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626936059,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
},
|
||||
{
|
||||
"name": "http接口返回状态码4xx/5xx错误",
|
||||
"type": 1,
|
||||
"expression": {
|
||||
"evaluation_interval": 10,
|
||||
"promql": "probe_http_status_code >=400"
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 1,
|
||||
"notify_channels": "",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626936145,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
},
|
||||
{
|
||||
"name": "icmp探测失败",
|
||||
"type": 1,
|
||||
"expression": {
|
||||
"evaluation_interval": 10,
|
||||
"promql": "probe_success{job=~\".*icmp.*\"}==0"
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 1,
|
||||
"notify_channels": "",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626935855,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
},
|
||||
{
|
||||
"name": "tcp端口探测失败",
|
||||
"type": 1,
|
||||
"expression": {
|
||||
"evaluation_interval": 10,
|
||||
"promql": "probe_success{job=~\".*tcp.*\"}==0"
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 1,
|
||||
"notify_channels": "",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626935874,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
},
|
||||
{
|
||||
"name": "机器ssh探测失败",
|
||||
"type": 1,
|
||||
"expression": {
|
||||
"evaluation_interval": 10,
|
||||
"promql": "probe_success{job=~\".*ssh.*\"}==0\n"
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 1,
|
||||
"notify_channels": "",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626935827,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
}
|
||||
]
|
|
@ -1,271 +0,0 @@
|
|||
[
|
||||
{
|
||||
"name": "cpu使用率大于85%",
|
||||
"type": 0,
|
||||
"expression": {
|
||||
"together_or_any": 0,
|
||||
"trigger_conditions": [
|
||||
{
|
||||
"optr": ">",
|
||||
"func": "all",
|
||||
"metric": "system_cpu_util",
|
||||
"params": [],
|
||||
"threshold": 85
|
||||
}
|
||||
],
|
||||
"tags_filters": []
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 2,
|
||||
"notify_channels": "",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626517658,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
},
|
||||
{
|
||||
"name": "内存利用率大于75%",
|
||||
"type": 0,
|
||||
"expression": {
|
||||
"together_or_any": 0,
|
||||
"trigger_conditions": [
|
||||
{
|
||||
"func": "all",
|
||||
"metric": "system_mem_used_percent",
|
||||
"optr": ">",
|
||||
"params": [],
|
||||
"threshold": 75
|
||||
}
|
||||
],
|
||||
"tags_filters": []
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 2,
|
||||
"notify_channels": "sms email",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626517103,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
},
|
||||
{
|
||||
"name": "机器loadavg大于16",
|
||||
"type": 0,
|
||||
"expression": {
|
||||
"tags_filters": [],
|
||||
"trigger_conditions": [
|
||||
{
|
||||
"func": "all",
|
||||
"metric": "system_cpu_load1",
|
||||
"optr": ">",
|
||||
"params": [],
|
||||
"threshold": 16
|
||||
}
|
||||
]
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 1,
|
||||
"notify_channels": "sms email",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626517103,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
},
|
||||
{
|
||||
"name": "磁盘利用率达到85%",
|
||||
"type": 0,
|
||||
"expression": {
|
||||
"tags_filters": [],
|
||||
"trigger_conditions": [
|
||||
{
|
||||
"func": "all",
|
||||
"metric": "system_disk_used_percent",
|
||||
"optr": ">",
|
||||
"params": [],
|
||||
"threshold": 85
|
||||
}
|
||||
]
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 3,
|
||||
"notify_channels": "email",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626517103,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
},
|
||||
{
|
||||
"name": "磁盘利用率达到88%",
|
||||
"type": 0,
|
||||
"expression": {
|
||||
"tags_filters": [],
|
||||
"trigger_conditions": [
|
||||
{
|
||||
"func": "all",
|
||||
"metric": "system_disk_used_percent",
|
||||
"optr": ">",
|
||||
"params": [],
|
||||
"threshold": 88
|
||||
}
|
||||
]
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 2,
|
||||
"notify_channels": "email sms",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626517103,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
},
|
||||
{
|
||||
"name": "磁盘利用率达到92%",
|
||||
"type": 0,
|
||||
"expression": {
|
||||
"tags_filters": [],
|
||||
"trigger_conditions": [
|
||||
{
|
||||
"func": "all",
|
||||
"metric": "system_disk_used_percent",
|
||||
"optr": ">",
|
||||
"params": [],
|
||||
"threshold": 88
|
||||
}
|
||||
]
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 1,
|
||||
"notify_channels": "email sms voice",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626517103,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
},
|
||||
{
|
||||
"name": "端口挂了",
|
||||
"type": 0,
|
||||
"expression": {
|
||||
"tags_filters": [],
|
||||
"trigger_conditions": [
|
||||
{
|
||||
"func": "all",
|
||||
"metric": "proc_port_listen",
|
||||
"optr": "<",
|
||||
"params": [],
|
||||
"threshold": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 2,
|
||||
"notify_channels": "sms email",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626517103,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
},
|
||||
{
|
||||
"name": "网卡入方向错包",
|
||||
"type": 0,
|
||||
"expression": {
|
||||
"together_or_any": 0,
|
||||
"trigger_conditions": [
|
||||
{
|
||||
"optr": ">",
|
||||
"func": "all",
|
||||
"metric": "system_net_packets_in_error",
|
||||
"params": [
|
||||
1
|
||||
],
|
||||
"threshold": 3
|
||||
}
|
||||
],
|
||||
"tags_filters": []
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 2,
|
||||
"notify_channels": "",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626517809,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
},
|
||||
{
|
||||
"name": "网卡出方向错包",
|
||||
"type": 0,
|
||||
"expression": {
|
||||
"together_or_any": 0,
|
||||
"trigger_conditions": [
|
||||
{
|
||||
"optr": ">",
|
||||
"func": "all",
|
||||
"metric": "system_net_packets_out_error",
|
||||
"params": [
|
||||
1
|
||||
],
|
||||
"threshold": 3
|
||||
}
|
||||
],
|
||||
"tags_filters": []
|
||||
},
|
||||
"status": 0,
|
||||
"enable_stime": "00:00",
|
||||
"enable_etime": "23:59",
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"recovery_notify": 0,
|
||||
"priority": 2,
|
||||
"notify_channels": "",
|
||||
"runbook_url": "",
|
||||
"note": "",
|
||||
"create_at": 1626517838,
|
||||
"alert_duration": 60,
|
||||
"notify_users_detail": null,
|
||||
"notify_groups_detail": null
|
||||
}
|
||||
]
|
|
@ -1,226 +0,0 @@
|
|||
[
|
||||
{
|
||||
"id": 0,
|
||||
"name": "blackbox_exporter",
|
||||
"tags": "",
|
||||
"configs": "{\"tags\":[{\"tagName\":\"http_probe_job\",\"key\":\"job\",\"value\":\"blackbox-http\",\"prefix\":false},{\"tagName\":\"http_probe_instance\",\"key\":\"instance\",\"value\":\"*\",\"prefix\":false}]}",
|
||||
"chart_groups": [
|
||||
{
|
||||
"id": 0,
|
||||
"dashboard_id": 0,
|
||||
"name": "http接口探测",
|
||||
"weight": 0,
|
||||
"charts": [
|
||||
{
|
||||
"id": 440,
|
||||
"group_id": 109,
|
||||
"configs": "{\"name\":\"https的探测\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_http_ssl==1\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 441,
|
||||
"group_id": 109,
|
||||
"configs": "{\"name\":\"http的探测\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_http_ssl==0\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 442,
|
||||
"group_id": 109,
|
||||
"configs": "{\"name\":\"https探测目标个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_http_ssl==1)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 443,
|
||||
"group_id": 109,
|
||||
"configs": "{\"name\":\"http探测目标个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_http_ssl==0)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 446,
|
||||
"group_id": 109,
|
||||
"configs": "{\"name\":\"http探测成功个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_success{job=~\\\".*http.*\\\"}==1)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":2,\"i\":\"4\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 447,
|
||||
"group_id": 109,
|
||||
"configs": "{\"name\":\"http探测失败列表\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_success{job=~\\\".*http.*\\\"}==0\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":2,\"i\":\"5\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 448,
|
||||
"group_id": 109,
|
||||
"configs": "{\"name\":\"http探测失败个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_success{job=~\\\".*http.*\\\"}==0)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":2,\"i\":\"6\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 449,
|
||||
"group_id": 109,
|
||||
"configs": "{\"name\":\"http探测总耗时 单位秒\",\"mode\":\"promethues\",\"prome_ql\":[\"sum(probe_http_duration_seconds) by (instance)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":2,\"i\":\"7\"}}",
|
||||
"weight": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"dashboard_id": 0,
|
||||
"name": "https接口探测汇总",
|
||||
"weight": 1,
|
||||
"charts": [
|
||||
{
|
||||
"id": 444,
|
||||
"group_id": 110,
|
||||
"configs": "{\"name\":\"tls版本信息\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_tls_version_info\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 445,
|
||||
"group_id": 110,
|
||||
"configs": "{\"name\":\"tls证书过期时间 单位:天\",\"mode\":\"promethues\",\"prome_ql\":[\"(probe_ssl_earliest_cert_expiry - time()) / 3600 / 24\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}",
|
||||
"weight": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"dashboard_id": 0,
|
||||
"name": "http接口各阶段耗时详情",
|
||||
"weight": 2,
|
||||
"charts": [
|
||||
{
|
||||
"id": 450,
|
||||
"group_id": 111,
|
||||
"configs": "{\"name\":\"单个目标的各阶段耗时\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_http_duration_seconds{instance=~\\\"$instance\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 451,
|
||||
"group_id": 111,
|
||||
"configs": "{\"name\":\"[阶段1] dns解析时间\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_http_duration_seconds{instance=~\\\"$instance\\\",phase=\\\"resolve\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 452,
|
||||
"group_id": 111,
|
||||
"configs": "{\"name\":\"[可无]tls握手时间\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_http_duration_seconds{instance=~\\\"$instance\\\",phase=\\\"tls\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 453,
|
||||
"group_id": 111,
|
||||
"configs": "{\"name\":\"[阶段2] tcp连接耗时\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_http_duration_seconds{instance=~\\\"$instance\\\",phase=\\\"connect\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 454,
|
||||
"group_id": 111,
|
||||
"configs": "{\"name\":\"[阶段3] 服务端处理耗时\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_http_duration_seconds{instance=~\\\"$instance\\\",phase=\\\"processing\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":2,\"i\":\"4\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 455,
|
||||
"group_id": 111,
|
||||
"configs": "{\"name\":\"[阶段4] 传输响应耗时\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_http_duration_seconds{instance=~\\\"$instance\\\",phase=\\\"transfer\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":2,\"i\":\"5\"}}",
|
||||
"weight": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"dashboard_id": 0,
|
||||
"name": "ssh存活探测(配置了ssh探测job才有)",
|
||||
"weight": 3,
|
||||
"charts": [
|
||||
{
|
||||
"id": 456,
|
||||
"group_id": 112,
|
||||
"configs": "{\"name\":\"ssh探测成功个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_success{job=~\\\".*ssh.*\\\"}==1)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 457,
|
||||
"group_id": 112,
|
||||
"configs": "{\"name\":\"ssh探测失败个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_success{job=~\\\".*ssh.*\\\"}==0)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 458,
|
||||
"group_id": 112,
|
||||
"configs": "{\"name\":\"ssh探测失败详情\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_success{job=~\\\".*ssh.*\\\"}==0\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 459,
|
||||
"group_id": 112,
|
||||
"configs": "{\"name\":\"ssh探测耗时\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_duration_seconds{job=~\\\".*ssh.*\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}",
|
||||
"weight": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"dashboard_id": 0,
|
||||
"name": "icmp探测(配置了icmp探测job才有)",
|
||||
"weight": 4,
|
||||
"charts": [
|
||||
{
|
||||
"id": 460,
|
||||
"group_id": 113,
|
||||
"configs": "{\"name\":\"icmp探测成功个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_success{job=~\\\".*icmp.*\\\"}==1)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 461,
|
||||
"group_id": 113,
|
||||
"configs": "{\"name\":\"icmp探测失败个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_success{job=~\\\".*icmp.*\\\"}==0)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 462,
|
||||
"group_id": 113,
|
||||
"configs": "{\"name\":\"icmp探测失败详情\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_success{job=~\\\".*icmp.*\\\"}==0\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 463,
|
||||
"group_id": 113,
|
||||
"configs": "{\"name\":\"icmp探测总耗时\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_duration_seconds{job=~\\\".*icmp.*\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}",
|
||||
"weight": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"dashboard_id": 0,
|
||||
"name": "tcp端口探测(配置了tcp探测job才有)",
|
||||
"weight": 5,
|
||||
"charts": [
|
||||
{
|
||||
"id": 464,
|
||||
"group_id": 114,
|
||||
"configs": "{\"name\":\"tcp端口探测成功个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_success{job=~\\\".*tcp.*\\\"}==1)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 465,
|
||||
"group_id": 114,
|
||||
"configs": "{\"name\":\"tcp端口探测失败个数\",\"mode\":\"promethues\",\"prome_ql\":[\"count(probe_success{job=~\\\".*tcp.*\\\"}==0)\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 466,
|
||||
"group_id": 114,
|
||||
"configs": "{\"name\":\"tcp端口探测失败列表\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_success{job=~\\\".*tcp.*\\\"}==0\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 467,
|
||||
"group_id": 114,
|
||||
"configs": "{\"name\":\"tcp端口探测耗时\",\"mode\":\"promethues\",\"prome_ql\":[\"probe_duration_seconds{job=~\\\".*tcp.*\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}",
|
||||
"weight": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
|
@ -1,306 +0,0 @@
|
|||
[
|
||||
{
|
||||
"id": 0,
|
||||
"name": "jmx_exporter",
|
||||
"tags": "",
|
||||
"configs": "{\"tags\":[{\"tagName\":\"java_app\",\"key\":\"java_app\",\"value\":\"*\",\"prefix\":false}]}",
|
||||
"chart_groups": [
|
||||
{
|
||||
"id": 0,
|
||||
"dashboard_id": 0,
|
||||
"name": "jvm统计",
|
||||
"weight": 1,
|
||||
"charts": [
|
||||
{
|
||||
"id": 278,
|
||||
"group_id": 75,
|
||||
"configs": "{\"name\":\"jvm版本信息\",\"mode\":\"promethues\",\"prome_ql\":[\"avg(jvm_info{java_app=~\\\"$java_app\\\"}) without (runtime,vendor)\"],\"layout\":{\"h\":2,\"w\":12,\"x\":0,\"y\":0,\"i\":\"0\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 309,
|
||||
"group_id": 75,
|
||||
"configs": "{\"name\":\"java进程启动时间 单位:小时\",\"mode\":\"promethues\",\"prome_ql\":[\"(time() - process_start_time_seconds{java_app=~\\\"$java_app\\\"})/3600\"],\"layout\":{\"h\":2,\"w\":12,\"x\":12,\"y\":0,\"i\":\"1\"}}",
|
||||
"weight": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"dashboard_id": 0,
|
||||
"name": "jvm内存使用",
|
||||
"weight": 2,
|
||||
"charts": [
|
||||
{
|
||||
"id": 279,
|
||||
"group_id": 76,
|
||||
"configs": "{\"name\":\"jvm内存使用 - nonheap 非堆区\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_memory_bytes_used{java_app=~\\\"$java_app\\\",area=\\\"nonheap\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 280,
|
||||
"group_id": 76,
|
||||
"configs": "{\"name\":\"jvm内存使用 - heap堆区\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_memory_bytes_used{java_app=~\\\"$java_app\\\",area=\\\"heap\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 281,
|
||||
"group_id": 76,
|
||||
"configs": "{\"name\":\"提交给 Java虚拟机使用的内存量 heap 堆区\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_memory_bytes_committed{java_app=~\\\"$java_app\\\",area=\\\"heap\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 282,
|
||||
"group_id": 76,
|
||||
"configs": "{\"name\":\"提交给 Java虚拟机使用的内存量 nonheap 非堆区\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_memory_bytes_committed{java_app=~\\\"$java_app\\\",area=\\\"nonheap\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 283,
|
||||
"group_id": 76,
|
||||
"configs": "{\"name\":\"jvm最大内存 \",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_memory_bytes_max{java_app=~\\\"$java_app\\\",area=\\\"heap\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":2,\"i\":\"4\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 285,
|
||||
"group_id": 76,
|
||||
"configs": "{\"name\":\"jvm 初始化内存\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_memory_bytes_init{java_app=~\\\"$java_app\\\",area=\\\"heap\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":2,\"i\":\"5\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 286,
|
||||
"group_id": 76,
|
||||
"configs": "{\"name\":\"jvm内存使用百分比% heap堆区 \",\"mode\":\"promethues\",\"prome_ql\":[\"100 * jvm_memory_bytes_used{java_app=~\\\"$java_app\\\",area=\\\"heap\\\"}/jvm_memory_bytes_max{java_app=~\\\"$java_app\\\",area=\\\"heap\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":2,\"i\":\"6\"}}",
|
||||
"weight": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"dashboard_id": 0,
|
||||
"name": "jvm内存池",
|
||||
"weight": 3,
|
||||
"charts": [
|
||||
{
|
||||
"id": 287,
|
||||
"group_id": 77,
|
||||
"configs": "{\"name\":\"jvm内存池分pool展示\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_memory_pool_bytes_max{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":24,\"x\":0,\"y\":0,\"i\":\"0\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 316,
|
||||
"group_id": 77,
|
||||
"configs": "{\"name\":\" JVM 缓冲池使用缓存大小\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_buffer_pool_used_bytes{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":2,\"i\":\"1\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 317,
|
||||
"group_id": 77,
|
||||
"configs": "{\"name\":\"JVM 缓冲池的字节容量\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_buffer_pool_capacity_bytes{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":2,\"i\":\"2\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 318,
|
||||
"group_id": 77,
|
||||
"configs": "{\"name\":\"JVM 缓冲池使用的字节大小\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_buffer_pool_used_bytes{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":2,\"i\":\"3\"}}",
|
||||
"weight": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"dashboard_id": 0,
|
||||
"name": "jvm gc情况",
|
||||
"weight": 4,
|
||||
"charts": [
|
||||
{
|
||||
"id": 288,
|
||||
"group_id": 78,
|
||||
"configs": "{\"name\":\"新生代gc耗时 1分钟\",\"mode\":\"promethues\",\"prome_ql\":[\"increase(jvm_gc_collection_seconds_sum{java_app=~\\\"$java_app\\\",gc=\\\"G1 Young Generation\\\" }[1m])\"],\"layout\":{\"h\":2,\"w\":8,\"x\":0,\"y\":0,\"i\":\"0\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 289,
|
||||
"group_id": 78,
|
||||
"configs": "{\"name\":\"老生代gc耗时 1分钟\",\"mode\":\"promethues\",\"prome_ql\":[\"increase(jvm_gc_collection_seconds_sum{java_app=~\\\"$java_app\\\",gc=\\\"G1 Old Generation\\\" }[1m])\"],\"layout\":{\"h\":2,\"w\":8,\"x\":8,\"y\":0,\"i\":\"1\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 290,
|
||||
"group_id": 78,
|
||||
"configs": "{\"name\":\"新生代gc次数 1分钟\",\"mode\":\"promethues\",\"prome_ql\":[\"increase(jvm_gc_collection_seconds_count{java_app=~\\\"$java_app\\\",gc=\\\"G1 Young Generation\\\" }[1m])\"],\"layout\":{\"h\":2,\"w\":8,\"x\":16,\"y\":0,\"i\":\"2\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 291,
|
||||
"group_id": 78,
|
||||
"configs": "{\"name\":\"老生代gc次数 1分钟\",\"mode\":\"promethues\",\"prome_ql\":[\"increase(jvm_gc_collection_seconds_count{java_app=~\\\"$java_app\\\",gc=\\\"G1 Old Generation\\\" }[1m])\"],\"layout\":{\"h\":2,\"w\":8,\"x\":0,\"y\":2,\"i\":\"3\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 292,
|
||||
"group_id": 78,
|
||||
"configs": "{\"name\":\"新生代平均gc耗时 秒\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_gc_collection_seconds_sum{java_app=~\\\"$java_app\\\",gc=\\\"G1 Young Generation\\\" }/jvm_gc_collection_seconds_count{java_app=~\\\"$java_app\\\",gc=\\\"G1 Young Generation\\\" }\"],\"layout\":{\"h\":2,\"w\":8,\"x\":8,\"y\":2,\"i\":\"4\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 293,
|
||||
"group_id": 78,
|
||||
"configs": "{\"name\":\"老生代平均gc耗时\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_gc_collection_seconds_sum{java_app=~\\\"$java_app\\\",gc=\\\"G1 Old Generation\\\"}/jvm_gc_collection_seconds_count{java_app=~\\\"$java_app\\\",gc=\\\"G1 Old Generation\\\" }\"],\"layout\":{\"h\":2,\"w\":8,\"x\":16,\"y\":2,\"i\":\"5\"}}",
|
||||
"weight": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"dashboard_id": 0,
|
||||
"name": "jvm线程情况",
|
||||
"weight": 5,
|
||||
"charts": [
|
||||
{
|
||||
"id": 294,
|
||||
"group_id": 79,
|
||||
"configs": "{\"name\":\"当前线程数\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_threads_current{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 295,
|
||||
"group_id": 79,
|
||||
"configs": "{\"name\":\"守护线程数\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_threads_daemon{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 296,
|
||||
"group_id": 79,
|
||||
"configs": "{\"name\":\"死锁线程数\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_threads_deadlocked{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 297,
|
||||
"group_id": 79,
|
||||
"configs": "{\"name\":\"活动线程峰值\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_threads_peak{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 298,
|
||||
"group_id": 79,
|
||||
"configs": "{\"name\":\"自JVM启动后,启动的线程总量(包括daemon,non-daemon和终止了的)\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_threads_started_total{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":2,\"i\":\"4\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 299,
|
||||
"group_id": 79,
|
||||
"configs": "{\"name\":\"当前TERMINATED线程个数\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_threads_state{java_app=~\\\"$java_app\\\",state=\\\"TERMINATED\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":4,\"i\":\"5\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 300,
|
||||
"group_id": 79,
|
||||
"configs": "{\"name\":\"当前RUNNABLE线程个数\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_threads_state{java_app=~\\\"$java_app\\\",state=\\\"RUNNABLE\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":2,\"i\":\"6\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 301,
|
||||
"group_id": 79,
|
||||
"configs": "{\"name\":\"当前NEW线程个数\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_threads_state{java_app=~\\\"$java_app\\\",state=\\\"NEW\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":2,\"i\":\"7\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 302,
|
||||
"group_id": 79,
|
||||
"configs": "{\"name\":\"当前TIMED_WAITING线程个数\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_threads_state{java_app=~\\\"$java_app\\\",state=\\\"TIMED_WAITING\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":4,\"i\":\"8\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 303,
|
||||
"group_id": 79,
|
||||
"configs": "{\"name\":\"当前BLOCKED线程个数\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_threads_state{java_app=~\\\"$java_app\\\",state=\\\"BLOCKED\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":4,\"i\":\"9\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 304,
|
||||
"group_id": 79,
|
||||
"configs": "{\"name\":\"当前WAITING线程个数\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_threads_state{java_app=~\\\"$java_app\\\",state=\\\"WAITING\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":4,\"i\":\"10\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 305,
|
||||
"group_id": 79,
|
||||
"configs": "{\"name\":\"当前线程状态汇总\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_threads_state{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":2,\"i\":\"11\"}}",
|
||||
"weight": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"dashboard_id": 0,
|
||||
"name": "加载类情况",
|
||||
"weight": 6,
|
||||
"charts": [
|
||||
{
|
||||
"id": 306,
|
||||
"group_id": 80,
|
||||
"configs": "{\"name\":\"jvm 当前加载的类个数 \",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_classes_loaded{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":8,\"x\":0,\"y\":0,\"i\":\"0\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 307,
|
||||
"group_id": 80,
|
||||
"configs": "{\"name\":\"jvm启动以来加载的类总个数\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_classes_loaded_total{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":8,\"x\":8,\"y\":0,\"i\":\"1\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 308,
|
||||
"group_id": 80,
|
||||
"configs": "{\"name\":\"jvm启动以来卸载的类总个数\",\"mode\":\"promethues\",\"prome_ql\":[\"jvm_classes_unloaded_total{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":8,\"x\":16,\"y\":0,\"i\":\"2\"}}",
|
||||
"weight": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"dashboard_id": 0,
|
||||
"name": "机器指标(配置了java.lang才有)",
|
||||
"weight": 7,
|
||||
"charts": [
|
||||
{
|
||||
"id": 311,
|
||||
"group_id": 81,
|
||||
"configs": "{\"name\":\"java进程打开fd数\",\"mode\":\"promethues\",\"prome_ql\":[\"os_open_file_descriptor_count{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":8,\"x\":0,\"y\":0,\"i\":\"0\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 312,
|
||||
"group_id": 81,
|
||||
"configs": "{\"name\":\"机器总内存\",\"mode\":\"promethues\",\"prome_ql\":[\"os_total_memory_size{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":8,\"x\":8,\"y\":0,\"i\":\"1\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 313,
|
||||
"group_id": 81,
|
||||
"configs": "{\"name\":\"机器可用内存数\",\"mode\":\"promethues\",\"prome_ql\":[\"os_free_memory_size{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":8,\"x\":16,\"y\":0,\"i\":\"2\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 314,
|
||||
"group_id": 81,
|
||||
"configs": "{\"name\":\"机器近期cpu使用率\",\"mode\":\"promethues\",\"link\":\"https://docs.oracle.com/javase/7/docs/jre/api/management/extension/com/sun/management/OperatingSystemMXBean.html#getSystemCpuLoad()\",\"prome_ql\":[\"100 * os_system_cpu_load{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":8,\"x\":0,\"y\":2,\"i\":\"3\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 315,
|
||||
"group_id": 81,
|
||||
"configs": "{\"name\":\"java进程cpu使用\",\"mode\":\"promethues\",\"link\":\"https://docs.oracle.com/javase/7/docs/jre/api/management/extension/com/sun/management/OperatingSystemMXBean.html#getProcessCpuLoad()\",\"prome_ql\":[\"os_process_cpu_load{java_app=~\\\"$java_app\\\"}\"],\"layout\":{\"h\":2,\"w\":8,\"x\":8,\"y\":2,\"i\":\"4\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 319,
|
||||
"group_id": 81,
|
||||
"configs": "{\"name\":\"jvm cpu百分比\",\"mode\":\"promethues\",\"prome_ql\":[\"100 *(os_process_cpu_load{java_app=~\\\"$java_app\\\"}/os_system_cpu_load{java_app=~\\\"$java_app\\\"})\"],\"layout\":{\"h\":2,\"w\":8,\"x\":16,\"y\":2,\"i\":\"5\"}}",
|
||||
"weight": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
|
@ -1,42 +0,0 @@
|
|||
[
|
||||
{
|
||||
"id": 0,
|
||||
"name": "linux_host",
|
||||
"tags": "",
|
||||
"configs": "{\"tags\":[{\"tagName\":\"ident\",\"key\":\"ident\",\"value\":\"*\",\"prefix\":false}]}",
|
||||
"chart_groups": [
|
||||
{
|
||||
"id": 0,
|
||||
"dashboard_id": 0,
|
||||
"name": "Default chart group",
|
||||
"weight": 0,
|
||||
"charts": [
|
||||
{
|
||||
"id": 1,
|
||||
"group_id": 1,
|
||||
"configs": "{\"name\":\"CPU使用率\",\"mode\":\"nightingale\",\"metric\":[\"system_cpu_util\"],\"tags\":{},\"layout\":{\"h\":2,\"w\":6,\"x\":0,\"y\":0,\"i\":\"0\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"group_id": 1,
|
||||
"configs": "{\"name\":\"硬盘使用率\",\"mode\":\"nightingale\",\"metric\":[\"system_disk_used_percent\"],\"tags\":{},\"layout\":{\"h\":2,\"w\":6,\"x\":6,\"y\":0,\"i\":\"1\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"group_id": 1,
|
||||
"configs": "{\"name\":\"内存使用率\",\"mode\":\"nightingale\",\"metric\":[\"system_mem_used_percent\"],\"tags\":{},\"layout\":{\"h\":2,\"w\":6,\"x\":12,\"y\":0,\"i\":\"2\"}}",
|
||||
"weight": 0
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"group_id": 1,
|
||||
"configs": "{\"name\":\"IO使用率\",\"mode\":\"nightingale\",\"metric\":[\"system_io_util\"],\"tags\":{},\"layout\":{\"h\":2,\"w\":6,\"x\":18,\"y\":0,\"i\":\"3\"}}",
|
||||
"weight": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
|
@ -0,0 +1 @@
|
|||
cpu_usage_idle: CPU空闲率(单位:%)
|
|
@ -1,36 +1,12 @@
|
|||
#!/usr/bin/python
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
#
|
||||
# n9e-server把告警事件通过stdin的方式传入notify.py,notify.py从事件中解析出接收人信息、拼出通知内容,发送通知
|
||||
# 脚本的灵活性高,要接入短信、电话、jira、飞书等,都非常容易,只要有接口,notify.py去调用即可
|
||||
#
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
import urllib2
|
||||
import smtplib
|
||||
import time
|
||||
import requests
|
||||
from email.mime.text import MIMEText
|
||||
from email.header import Header
|
||||
from bottle import template
|
||||
|
||||
reload(sys) # reload 才能调用 setdefaultencoding 方法
|
||||
sys.setdefaultencoding('utf-8') # 设置 'utf-8'
|
||||
|
||||
################################
|
||||
## 邮件告警,修改下面的配置 ##
|
||||
################################
|
||||
mail_host = "smtp.163.com"
|
||||
mail_port = 994
|
||||
mail_user = "ulricqin"
|
||||
mail_pass = "password"
|
||||
mail_from = "ulricqin@163.com"
|
||||
|
||||
# 本地告警event json存储目录
|
||||
LOCAL_EVENT_FILE_DIR = ".alerts"
|
||||
NOTIFY_CHANNELS_SPLIT_STR = " "
|
||||
|
||||
NOTIFY_CHANNEL_DICT = {
|
||||
notify_channel_funcs = {
|
||||
"email":"email",
|
||||
"sms":"sms",
|
||||
"voice":"voice",
|
||||
|
@ -38,290 +14,149 @@ NOTIFY_CHANNEL_DICT = {
|
|||
"wecom":"wecom"
|
||||
}
|
||||
|
||||
# stdin 告警json实例
|
||||
TEST_ALERT_JSON = {
|
||||
"event": {
|
||||
"alert_duration": 10,
|
||||
"notify_channels": "dingtalk",
|
||||
"res_classpaths": "all",
|
||||
"id": 4,
|
||||
"notify_group_objs": None,
|
||||
"rule_note": "",
|
||||
"history_points": [
|
||||
{
|
||||
"metric": "go_goroutines",
|
||||
"points": [
|
||||
{
|
||||
"t": 1625213114,
|
||||
"v": 33.0
|
||||
}
|
||||
],
|
||||
"tags": {
|
||||
"instance": "localhost:9090",
|
||||
"job": "prometheus"
|
||||
}
|
||||
}
|
||||
],
|
||||
"priority": 1,
|
||||
"last_sent": True,
|
||||
"tag_map": {
|
||||
"instance": "localhost:9090",
|
||||
"job": "prometheus"
|
||||
},
|
||||
"hash_id": "ecb258d2ca03454ee390a352913c461b",
|
||||
"status": 0,
|
||||
"tags": "instance=localhost:9090 job=prometheus",
|
||||
"trigger_time": 1625213114,
|
||||
"res_ident": "ident1",
|
||||
"rule_name": "alert_test",
|
||||
"is_prome_pull": 1,
|
||||
"notify_users": "1",
|
||||
"notify_groups": "",
|
||||
"runbook_url": "",
|
||||
"values": "[vector={__name__=\"go_goroutines\", instance=\"localhost:9090\", job=\"prometheus\"}]: [value=33.000000]",
|
||||
"readable_expression": "go_goroutines>0",
|
||||
"notify_user_objs": None,
|
||||
"is_recovery": 1,
|
||||
"rule_id": 1
|
||||
},
|
||||
"rule": {
|
||||
"alert_duration": 10,
|
||||
"notify_channels": "dingtalk",
|
||||
"enable_stime": "00:00",
|
||||
"id": 1,
|
||||
"note": "",
|
||||
"create_by": "root",
|
||||
"append_tags": "",
|
||||
"priority": 1,
|
||||
"update_by": "root",
|
||||
"type": 1,
|
||||
"status": 0,
|
||||
"recovery_notify": 0,
|
||||
"enable_days_of_week": "1 2 3 4 5 6 7",
|
||||
"callbacks": "localhost:10000",
|
||||
"notify_users": "1",
|
||||
"notify_groups": "",
|
||||
"runbook_url": "",
|
||||
"name": "a",
|
||||
"update_at": 1625211576,
|
||||
"create_at": 1625211576,
|
||||
"enable_etime": "23:59",
|
||||
"group_id": 1,
|
||||
"expression": {
|
||||
"evaluation_interval": 4,
|
||||
"promql": "go_goroutines>0"
|
||||
}
|
||||
},
|
||||
"users": [
|
||||
{
|
||||
"username": "root",
|
||||
"status": 0,
|
||||
"contacts": {
|
||||
"dingtalk_robot_token": "xxxxxx"
|
||||
},
|
||||
"create_by": "system",
|
||||
"update_at": 1625211432,
|
||||
"create_at": 1624871926,
|
||||
"email": "",
|
||||
"phone": "",
|
||||
"role": "Admin",
|
||||
"update_by": "root",
|
||||
"portrait": "",
|
||||
"nickname": "\u8d85\u7ba1",
|
||||
"id": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
mail_host = "smtp.163.com"
|
||||
mail_port = 994
|
||||
mail_user = "ulricqin"
|
||||
mail_pass = "password"
|
||||
mail_from = "ulricqin@163.com"
|
||||
|
||||
|
||||
def main():
|
||||
payload = json.load(sys.stdin)
|
||||
trigger_time = payload['event']['trigger_time']
|
||||
event_id = payload['event']['id']
|
||||
rule_id = payload['rule']['id']
|
||||
notify_channels = payload['event'].get('notify_channels').strip().split(NOTIFY_CHANNELS_SPLIT_STR)
|
||||
if len(notify_channels) == 0:
|
||||
msg = "notify_channels_empty"
|
||||
print(msg)
|
||||
return
|
||||
# 持久化到本地json文件
|
||||
persist(payload, rule_id, event_id, trigger_time)
|
||||
# 生成告警内容
|
||||
alert_content = sms_content_gen(values_gen(payload))
|
||||
for ch in notify_channels:
|
||||
send_func_name = "send_{}".format(NOTIFY_CHANNEL_DICT.get(ch.strip()))
|
||||
has_func = hasattr(Send, send_func_name)
|
||||
|
||||
if not has_func:
|
||||
msg = "[send_func_name_err][func_not_found_in_Send_class:{}]".format(send_func_name)
|
||||
print(msg)
|
||||
continue
|
||||
send_func = getattr(Send, send_func_name)
|
||||
send_func(alert_content, payload)
|
||||
|
||||
def values_gen(payload):
|
||||
event_obj = payload.get("event")
|
||||
values = {
|
||||
"IsAlert": event_obj.get("is_recovery") == 0,
|
||||
"IsMachineDep": event_obj.get("res_classpaths") != "",
|
||||
"Status": status_gen(event_obj.get("priority"),event_obj.get("is_recovery")),
|
||||
"Sname": event_obj.get("rule_name"),
|
||||
"Ident": event_obj.get("res_ident"),
|
||||
"Classpath": event_obj.get("res_classpaths"),
|
||||
"Metric": metric_gen(event_obj.get("history_points")),
|
||||
"Tags": event_obj.get("tags"),
|
||||
"Value": event_obj.get("values"),
|
||||
"ReadableExpression": event_obj.get("readable_expression"),
|
||||
"TriggerTime": time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(event_obj.get("trigger_time"))),
|
||||
"Elink": "http://n9e.didiyun.com/strategy/edit/{}".format(event_obj.get("rule_id")),
|
||||
"Slink": "http://n9e.didiyun.com/event/{}".format(event_obj.get("id"))
|
||||
}
|
||||
|
||||
return values
|
||||
|
||||
def email_content_gen(values):
|
||||
return template('etc/script/tpl/mail.tpl', values)
|
||||
|
||||
def sms_content_gen(values):
|
||||
return template('etc/script/tpl/sms.tpl', values)
|
||||
|
||||
def status_gen(priority,is_recovery):
|
||||
is_recovery_str_m = {1: "恢复", 0: "告警"}
|
||||
status = "P{} {}".format(priority, is_recovery_str_m.get(is_recovery))
|
||||
return status
|
||||
|
||||
def subject_gen(priority,is_recovery,rule_name):
|
||||
is_recovery_str_m = {1: "恢复", 0: "告警"}
|
||||
subject = "P{} {} {}".format(priority, is_recovery_str_m.get(is_recovery), rule_name)
|
||||
return subject
|
||||
|
||||
def metric_gen(history_points):
|
||||
metrics = []
|
||||
for item in history_points:
|
||||
metrics.append(item.get("metric"))
|
||||
return ",".join(metrics)
|
||||
|
||||
def persist(payload, rule_id, event_id, trigger_time):
|
||||
if not os.path.exists(LOCAL_EVENT_FILE_DIR):
|
||||
os.makedirs(LOCAL_EVENT_FILE_DIR)
|
||||
|
||||
filename = '%d_%d_%d' % (rule_id, event_id, trigger_time)
|
||||
filepath = os.path.join(LOCAL_EVENT_FILE_DIR, filename)
|
||||
with open(filepath, 'w') as f:
|
||||
f.write(json.dumps(payload, indent=4))
|
||||
|
||||
|
||||
class Send(object):
|
||||
class Sender(object):
|
||||
@classmethod
|
||||
def send_email(cls, alert_content, payload):
|
||||
users = payload.get("users")
|
||||
emails = [x.get("email") for x in users]
|
||||
def send_email(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
|
||||
emails = {}
|
||||
for u in users:
|
||||
if u.get("email"):
|
||||
emails[u.get("email")] = 1
|
||||
|
||||
if not emails:
|
||||
return
|
||||
|
||||
recipients = emails
|
||||
mail_body = email_content_gen(values_gen(payload))
|
||||
recipients = emails.keys()
|
||||
mail_body = payload.get('tpls').get("mailbody.tpl", "mailbody.tpl not found")
|
||||
message = MIMEText(mail_body, 'html', 'utf-8')
|
||||
message['From'] = mail_from
|
||||
message['To'] = ", ".join(recipients)
|
||||
message["Subject"] = subject_gen(payload.get("event").get("priority"),payload.get("event").get("is_recovery"),payload.get("event").get("rule_name"))
|
||||
message["Subject"] = payload.get('tpls').get("subject.tpl", "subject.tpl not found")
|
||||
|
||||
smtp = smtplib.SMTP_SSL(mail_host, mail_port)
|
||||
smtp.login(mail_user, mail_pass)
|
||||
smtp.sendmail(mail_from, recipients, message.as_string())
|
||||
smtp.close()
|
||||
|
||||
print("send_mail_success")
|
||||
try:
|
||||
smtp = smtplib.SMTP_SSL(mail_host, mail_port)
|
||||
smtp.login(mail_user, mail_pass)
|
||||
smtp.sendmail(mail_from, recipients, message.as_string())
|
||||
smtp.close()
|
||||
except smtplib.SMTPException, error:
|
||||
print(error)
|
||||
|
||||
@classmethod
|
||||
def send_wecom(cls, alert_content, payload):
|
||||
users = payload.get("users")
|
||||
def send_wecom(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
|
||||
tokens = {}
|
||||
|
||||
for u in users:
|
||||
contacts = u.get("contacts")
|
||||
wecom_robot_token = contacts.get("wecom_robot_token", "")
|
||||
if contacts.get("wecom_robot_token", ""):
|
||||
tokens[contacts.get("wecom_robot_token", "")] = 1
|
||||
|
||||
if wecom_robot_token == "":
|
||||
continue
|
||||
opener = urllib2.build_opener(urllib2.HTTPHandler())
|
||||
method = "POST"
|
||||
|
||||
wecom_api_url = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key={}".format(wecom_robot_token)
|
||||
atMobiles = [u.get("phone")]
|
||||
headers = {'Content-Type': 'application/json;charset=utf-8'}
|
||||
payload = {
|
||||
"msgtype": "text",
|
||||
"text": {
|
||||
"content": alert_content
|
||||
},
|
||||
"at": {
|
||||
"atMobiles": atMobiles,
|
||||
"isAtAll": False
|
||||
for t in tokens:
|
||||
url = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key={}".format(t)
|
||||
body = {
|
||||
"msgtype": "markdown",
|
||||
"markdown": {
|
||||
"content": payload.get('tpls').get("wecom.tpl", "wecom.tpl not found")
|
||||
}
|
||||
}
|
||||
res = requests.post(wecom_api_url, json.dumps(payload), headers=headers)
|
||||
print(res.status_code)
|
||||
print(res.text)
|
||||
print("send_wecom")
|
||||
|
||||
request = urllib2.Request(url, data=json.dumps(body))
|
||||
request.add_header("Content-Type",'application/json;charset=utf-8')
|
||||
request.get_method = lambda: method
|
||||
try:
|
||||
connection = opener.open(request)
|
||||
print(connection.read())
|
||||
except urllib2.HTTPError, error:
|
||||
print(error)
|
||||
|
||||
@classmethod
|
||||
def send_dingtalk(cls, alert_content, payload):
|
||||
# 钉钉发群信息需要群的webhook机器人 token,这个信息可以在user的contacts map中
|
||||
def send_dingtalk(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
|
||||
users = payload.get("users")
|
||||
tokens = {}
|
||||
phones = {}
|
||||
|
||||
for u in users:
|
||||
if u.get("phone"):
|
||||
phones[u.get("phone")] = 1
|
||||
|
||||
contacts = u.get("contacts")
|
||||
if contacts.get("dingtalk_robot_token", ""):
|
||||
tokens[contacts.get("dingtalk_robot_token", "")] = 1
|
||||
|
||||
dingtalk_robot_token = contacts.get("dingtalk_robot_token", "")
|
||||
opener = urllib2.build_opener(urllib2.HTTPHandler())
|
||||
method = "POST"
|
||||
|
||||
if dingtalk_robot_token == "":
|
||||
print("dingtalk_robot_token_not_found")
|
||||
continue
|
||||
|
||||
dingtalk_api_url = "https://oapi.dingtalk.com/robot/send?access_token={}".format(dingtalk_robot_token)
|
||||
atMobiles = [u.get("phone")]
|
||||
headers = {'Content-Type': 'application/json;charset=utf-8'}
|
||||
payload = {
|
||||
for t in tokens:
|
||||
url = "https://oapi.dingtalk.com/robot/send?access_token={}".format(t)
|
||||
body = {
|
||||
"msgtype": "text",
|
||||
"text": {
|
||||
"content": alert_content
|
||||
"content": payload.get('tpls').get("dingtalk.tpl", "dingtalk.tpl not found")
|
||||
},
|
||||
"at": {
|
||||
"atMobiles": atMobiles,
|
||||
"atMobiles": phones.keys(),
|
||||
"isAtAll": False
|
||||
}
|
||||
}
|
||||
res = requests.post(dingtalk_api_url, json.dumps(payload), headers=headers)
|
||||
print(res.status_code)
|
||||
print(res.text)
|
||||
request = urllib2.Request(url, data=json.dumps(body))
|
||||
request.add_header("Content-Type",'application/json;charset=utf-8')
|
||||
request.get_method = lambda: method
|
||||
try:
|
||||
connection = opener.open(request)
|
||||
print(connection.read())
|
||||
except urllib2.HTTPError, error:
|
||||
print(error)
|
||||
|
||||
print("send_dingtalk")
|
||||
@classmethod
|
||||
def send_sms(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
phones = {}
|
||||
for u in users:
|
||||
if u.get("phone"):
|
||||
phones[u.get("phone")] = 1
|
||||
if phones:
|
||||
print("send_sms not implemented, phones: {}".format(phones.keys()))
|
||||
|
||||
@classmethod
|
||||
def send_voice(cls, payload):
|
||||
users = payload.get('event').get("notify_users_obj")
|
||||
phones = {}
|
||||
for u in users:
|
||||
if u.get("phone"):
|
||||
phones[u.get("phone")] = 1
|
||||
if phones:
|
||||
print("send_voice not implemented, phones: {}".format(phones.keys()))
|
||||
|
||||
def mail_test():
|
||||
print("mail_test_todo")
|
||||
def main():
|
||||
payload = json.load(sys.stdin)
|
||||
with open(".payload", 'w') as f:
|
||||
f.write(json.dumps(payload, indent=4))
|
||||
for ch in payload.get('event').get('notify_channels'):
|
||||
send_func_name = "send_{}".format(notify_channel_funcs.get(ch.strip()))
|
||||
if not hasattr(Sender, send_func_name):
|
||||
print("function: {} not found", send_func_name)
|
||||
continue
|
||||
send_func = getattr(Sender, send_func_name)
|
||||
send_func(payload)
|
||||
|
||||
recipients = ["ulricqin@qq.com", "ulric@163.com"]
|
||||
|
||||
payload = json.loads(json.dumps(TEST_ALERT_JSON))
|
||||
mail_body = email_content_gen(values_gen(payload))
|
||||
message = MIMEText(mail_body, 'html', 'utf-8')
|
||||
message['From'] = mail_from
|
||||
message['To'] = ", ".join(recipients)
|
||||
message["Subject"] = subject_gen(payload.get("event").get("priority"),payload.get("event").get("is_recovery"),payload.get("event").get("rule_name"))
|
||||
|
||||
smtp = smtplib.SMTP_SSL(mail_host, mail_port)
|
||||
smtp.login(mail_user, mail_pass)
|
||||
smtp.sendmail(mail_from, recipients, message.as_string())
|
||||
smtp.close()
|
||||
|
||||
print("mail_test_done")
|
||||
def hello():
|
||||
print("hello nightingale")
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) == 1:
|
||||
main()
|
||||
elif sys.argv[1] == "mail":
|
||||
mail_test()
|
||||
elif sys.argv[1] == "hello":
|
||||
hello()
|
||||
else:
|
||||
print("I am confused")
|
||||
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
级别状态:{{Status}}
|
||||
策略名称:{{Sname}}
|
||||
% if IsMachineDep:
|
||||
告警设备:{{Ident}}
|
||||
挂载节点:{{Classpath}}
|
||||
% end
|
||||
监控指标:{{Metric}}
|
||||
指标标签:{{Tags}}
|
||||
当前值:{{!Value}}
|
||||
报警说明:{{!ReadableExpression}}
|
||||
触发时间:{{TriggerTime}}
|
||||
报警详情:{{Elink}}
|
||||
报警策略:{{Slink}}
|
||||
|
|
@ -0,0 +1,188 @@
|
|||
# debug, release
|
||||
RunMode = "release"
|
||||
|
||||
# my cluster name
|
||||
ClusterName = "Default"
|
||||
|
||||
[Log]
|
||||
# log write dir
|
||||
Dir = "logs"
|
||||
# log level: DEBUG INFO WARNING ERROR
|
||||
Level = "INFO"
|
||||
# stdout, stderr, file
|
||||
Output = "stdout"
|
||||
# # rotate by time
|
||||
# KeepHours: 4
|
||||
# # rotate by size
|
||||
# RotateNum = 3
|
||||
# # unit: MB
|
||||
# RotateSize = 256
|
||||
|
||||
[HTTP]
|
||||
# http listening address
|
||||
Host = "0.0.0.0"
|
||||
# http listening port
|
||||
Port = 19000
|
||||
# https cert file path
|
||||
CertFile = ""
|
||||
# https key file path
|
||||
KeyFile = ""
|
||||
# whether print access log
|
||||
PrintAccessLog = false
|
||||
# whether enable pprof
|
||||
PProf = false
|
||||
# http graceful shutdown timeout, unit: s
|
||||
ShutdownTimeout = 30
|
||||
# max content length: 64M
|
||||
MaxContentLength = 67108864
|
||||
# http server read timeout, unit: s
|
||||
ReadTimeout = 20
|
||||
# http server write timeout, unit: s
|
||||
WriteTimeout = 40
|
||||
# http server idle timeout, unit: s
|
||||
IdleTimeout = 120
|
||||
|
||||
# [BasicAuth]
|
||||
# user002 = "ccc26da7b9aba533cbb263a36c07dcc9"
|
||||
|
||||
[Heartbeat]
|
||||
# auto detect if blank
|
||||
IP = ""
|
||||
# unit ms
|
||||
Interval = 1000
|
||||
|
||||
[Alerting]
|
||||
NotifyScriptPath = "./etc/script/notify.py"
|
||||
NotifyConcurrency = 100
|
||||
|
||||
[Alerting.RedisPub]
|
||||
Enable = false
|
||||
# complete redis key: ${ChannelPrefix} + ${Cluster}
|
||||
ChannelPrefix = "/alerts/"
|
||||
|
||||
[NoData]
|
||||
Metric = "target_up"
|
||||
# unit: second
|
||||
Interval = 15
|
||||
|
||||
[Ibex]
|
||||
# callback: ${ibex}/${tplid}/${host}
|
||||
Address = "127.0.0.1:10090"
|
||||
# basic auth
|
||||
BasicAuthUser = "ibex"
|
||||
BasicAuthPass = "ibex"
|
||||
# unit: ms
|
||||
Timeout = 3000
|
||||
|
||||
[Redis]
|
||||
# address, ip:port
|
||||
Address = "127.0.0.1:6379"
|
||||
# requirepass
|
||||
Password = ""
|
||||
# # db
|
||||
# DB = 0
|
||||
|
||||
[Gorm]
|
||||
# enable debug mode or not
|
||||
Debug = false
|
||||
# mysql postgres
|
||||
DBType = "mysql"
|
||||
# unit: s
|
||||
MaxLifetime = 7200
|
||||
# max open connections
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# table prefix
|
||||
TablePrefix = ""
|
||||
# enable auto migrate or not
|
||||
EnableAutoMigrate = false
|
||||
|
||||
[MySQL]
|
||||
# mysql address host:port
|
||||
Address = "127.0.0.1:3306"
|
||||
# mysql username
|
||||
User = "root"
|
||||
# mysql password
|
||||
Password = "1234"
|
||||
# database name
|
||||
DBName = "n9e_v5"
|
||||
# connection params
|
||||
Parameters = "charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
|
||||
|
||||
[Postgres]
|
||||
# pg address host:port
|
||||
Address = "127.0.0.1:5432"
|
||||
# pg user
|
||||
User = "root"
|
||||
# pg password
|
||||
Password = "1234"
|
||||
# database name
|
||||
DBName = "n9e_v5"
|
||||
# ssl mode
|
||||
SSLMode = "disable"
|
||||
|
||||
[Reader]
|
||||
# prometheus base url
|
||||
Url = "http://127.0.0.1:9090"
|
||||
# Basic auth username
|
||||
BasicAuthUser = ""
|
||||
# Basic auth password
|
||||
BasicAuthPass = ""
|
||||
# timeout settings, unit: ms
|
||||
Timeout = 30000
|
||||
DialTimeout = 10000
|
||||
TLSHandshakeTimeout = 30000
|
||||
ExpectContinueTimeout = 1000
|
||||
IdleConnTimeout = 90000
|
||||
# time duration, unit: ms
|
||||
KeepAlive = 30000
|
||||
MaxConnsPerHost = 0
|
||||
MaxIdleConns = 100
|
||||
MaxIdleConnsPerHost = 10
|
||||
|
||||
[WriterOpt]
|
||||
# queue max size
|
||||
QueueMaxSize = 10000000
|
||||
# once pop samples number from queue
|
||||
QueuePopSize = 2000
|
||||
# unit: ms
|
||||
SleepInterval = 50
|
||||
|
||||
[[Writers]]
|
||||
Name = "prom"
|
||||
Url = "http://127.0.0.1:9090/api/v1/write"
|
||||
# Basic auth username
|
||||
BasicAuthUser = ""
|
||||
# Basic auth password
|
||||
BasicAuthPass = ""
|
||||
# timeout settings, unit: ms
|
||||
Timeout = 30000
|
||||
DialTimeout = 10000
|
||||
TLSHandshakeTimeout = 30000
|
||||
ExpectContinueTimeout = 1000
|
||||
IdleConnTimeout = 90000
|
||||
# time duration, unit: ms
|
||||
KeepAlive = 30000
|
||||
MaxConnsPerHost = 0
|
||||
MaxIdleConns = 100
|
||||
MaxIdleConnsPerHost = 100
|
||||
|
||||
# [[Writers]]
|
||||
# Name = "m3db"
|
||||
# Url = "http://127.0.0.1:7201/api/v1/prom/remote/write"
|
||||
# # Basic auth username
|
||||
# BasicAuthUser = ""
|
||||
# # Basic auth password
|
||||
# BasicAuthPass = ""
|
||||
# timeout settings, unit: ms
|
||||
# Timeout = 30000
|
||||
# DialTimeout = 10000
|
||||
# TLSHandshakeTimeout = 30000
|
||||
# ExpectContinueTimeout = 1000
|
||||
# IdleConnTimeout = 90000
|
||||
# # time duration, unit: ms
|
||||
# KeepAlive = 30000
|
||||
# MaxConnsPerHost = 0
|
||||
# MaxIdleConns = 100
|
||||
# MaxIdleConnsPerHost = 100
|
126
etc/server.yml
126
etc/server.yml
|
@ -1,126 +0,0 @@
|
|||
logger:
|
||||
dir: logs
|
||||
level: DEBUG
|
||||
# # rotate by time
|
||||
# keepHours: 4
|
||||
# rotate by size
|
||||
rotatenum: 3
|
||||
rotatesize: 256 # unit: MB
|
||||
|
||||
http:
|
||||
mode: release
|
||||
# whether print access log to DEBUG.log
|
||||
access: false
|
||||
listen: 0.0.0.0:8000
|
||||
pprof: false
|
||||
cookieName: n9e
|
||||
cookieDomain: ""
|
||||
cookieMaxAge: 86400
|
||||
cookieSecure: false
|
||||
cookieHttpOnly: true
|
||||
cookieSecret: 4696709ab8cc3ff2fea17b930158516b
|
||||
csrfSecret: 15b8ea164b5d3d9254677053c72a19f1
|
||||
|
||||
rpc:
|
||||
listen: 0.0.0.0:9000
|
||||
|
||||
mysql:
|
||||
addr: "root:1234@tcp(127.0.0.1:3306)/n9e?charset=utf8&parseTime=True&loc=Asia%2FShanghai"
|
||||
max: 128
|
||||
idle: 16
|
||||
debug: false
|
||||
|
||||
# i18n:
|
||||
# # zh | en
|
||||
# lang: zh
|
||||
# dictPath: etc/i18n.json
|
||||
|
||||
# heartbeat:
|
||||
# # auto detect if blank
|
||||
# ip: ""
|
||||
# # unit: ms
|
||||
# interval: 1000
|
||||
|
||||
# ldap:
|
||||
# enable: false
|
||||
# host: ldap.example.org
|
||||
# port: 389
|
||||
# baseDn: "dc=example,dc=org"
|
||||
# # AD: manange@example.org
|
||||
# bindUser: "cn=manager,dc=example,dc=org"
|
||||
# bindPass: "*******"
|
||||
# # openldap: (&(uid=%s))
|
||||
# # AD: (&(sAMAccountName=%s))
|
||||
# authFilter: "(&(uid=%s))"
|
||||
# attributes:
|
||||
# nickname: "cn"
|
||||
# email: "mail"
|
||||
# phone: "mobile"
|
||||
# coverAttributes: false
|
||||
# autoRegist: true
|
||||
# tls: false
|
||||
# startTLS: false
|
||||
|
||||
# judge:
|
||||
# readBatch: 2000
|
||||
# connTimeout: 2000
|
||||
# callTimeout: 5000
|
||||
# writerNum: 256
|
||||
# connMax: 2560
|
||||
# connIdle: 256
|
||||
|
||||
# alert:
|
||||
# notifyScriptPath: ./etc/script/notify.py
|
||||
# notifyConcurrency: 200
|
||||
# mutedAlertPersist: true
|
||||
|
||||
trans:
|
||||
enable: true
|
||||
backend:
|
||||
datasource: "prometheus"
|
||||
prometheus:
|
||||
enable: true
|
||||
name: prometheus
|
||||
batch: 100000
|
||||
maxRetry: 5
|
||||
# prometheus 查询返回最大点数query.max-samples
|
||||
maxSamples: 50000000
|
||||
# prometheus并发的查询 query.max-concurrency
|
||||
maxConcurrentQuery: 20
|
||||
# prometheus 回查窗口 query.lookback-delta
|
||||
lookbackDeltaMinute: 2
|
||||
# 查询全量索引时时间窗口限制,降低高基数
|
||||
maxFetchAllSeriesLimitMinute: 5
|
||||
# 查询接口耗时超过多少秒就打印warning日志记录
|
||||
slowLogRecordSecond: 3
|
||||
# remote_read时,如果没有查询条件则用这条默认的ql查询
|
||||
# 注意! ql匹配series越多,造成的oom或者慢查询可能越大
|
||||
defaultFetchSeriesQl: '{__name__=~"system.*"}'
|
||||
remoteWrite:
|
||||
# m3db的配置
|
||||
#- name: m3db01
|
||||
# url: http://localhost:7201/api/v1/prom/remote/write
|
||||
# remoteTimeoutSecond: 5
|
||||
|
||||
# prometheus的配置
|
||||
- name: prome01
|
||||
url: http://localhost:9090/api/v1/write
|
||||
remoteTimeoutSecond: 5
|
||||
remoteRead:
|
||||
- name: prome01
|
||||
url: http://localhost:9090/api/v1/read
|
||||
remoteTimeoutSecond: 5
|
||||
|
||||
|
||||
contactKeys:
|
||||
- label: "Wecom Robot Token"
|
||||
key: wecom_robot_token
|
||||
- label: "Dingtalk Robot Token"
|
||||
key: dingtalk_robot_token
|
||||
|
||||
notifyChannels:
|
||||
- email
|
||||
- sms
|
||||
- voice
|
||||
- dingtalk
|
||||
- wecom
|
|
@ -1,13 +1,14 @@
|
|||
[Unit]
|
||||
Description="n9e-server"
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=/opt/n9e/server/n9e-server
|
||||
WorkingDirectory=/opt/n9e/server
|
||||
|
||||
Restart=always
|
||||
RestartSecs=1s
|
||||
ExecStart=/root/gopath/src/n9e/n9e server
|
||||
WorkingDirectory=/root/gopath/src/n9e
|
||||
|
||||
Restart=on-failure
|
||||
SuccessExitStatus=0
|
||||
LimitNOFILE=65536
|
||||
StandardOutput=syslog
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
[Unit]
|
||||
Description="n9e-webapi"
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
|
||||
ExecStart=/root/gopath/src/n9e/n9e webapi
|
||||
WorkingDirectory=/root/gopath/src/n9e
|
||||
|
||||
Restart=on-failure
|
||||
SuccessExitStatus=0
|
||||
LimitNOFILE=65536
|
||||
StandardOutput=syslog
|
||||
StandardError=syslog
|
||||
SyslogIdentifier=n9e-webapi
|
||||
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
|
@ -0,0 +1,6 @@
|
|||
级别状态: S{{.Severity}} {{if .IsRecovered}}Recovered{{else}}Triggered{{end}}
|
||||
规则名称: {{.RuleName}}{{if .RuleNote}}
|
||||
规则备注: {{.RuleNote}}{{end}}
|
||||
监控指标: {{.TagsJSON}}
|
||||
触发时间: {{timeformat .TriggerTime}}
|
||||
触发时值: {{.TriggerValue}}
|
|
@ -0,0 +1,195 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="ie=edge">
|
||||
<title>夜莺告警通知</title>
|
||||
<style type="text/css">
|
||||
.wrapper {
|
||||
background-color: #f8f8f8;
|
||||
padding: 15px;
|
||||
height: 100%;
|
||||
}
|
||||
.main {
|
||||
width: 600px;
|
||||
padding: 30px;
|
||||
margin: 0 auto;
|
||||
background-color: #fff;
|
||||
font-size: 12px;
|
||||
font-family: verdana,'Microsoft YaHei',Consolas,'Deja Vu Sans Mono','Bitstream Vera Sans Mono';
|
||||
}
|
||||
header {
|
||||
border-radius: 2px 2px 0 0;
|
||||
}
|
||||
header .title {
|
||||
font-size: 16px;
|
||||
color: #333333;
|
||||
margin: 0;
|
||||
}
|
||||
header .sub-desc {
|
||||
color: #333;
|
||||
font-size: 14px;
|
||||
margin-top: 6px;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
hr {
|
||||
margin: 20px 0;
|
||||
height: 0;
|
||||
border: none;
|
||||
border-top: 1px solid #e5e5e5;
|
||||
}
|
||||
em {
|
||||
font-weight: 600;
|
||||
}
|
||||
table {
|
||||
margin: 20px 0;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
table tbody tr{
|
||||
font-weight: 200;
|
||||
font-size: 12px;
|
||||
color: #666;
|
||||
height: 32px;
|
||||
}
|
||||
|
||||
.succ {
|
||||
background-color: green;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.fail {
|
||||
background-color: red;
|
||||
color: white;
|
||||
}
|
||||
|
||||
table tbody tr th {
|
||||
width: 80px;
|
||||
text-align: right;
|
||||
}
|
||||
.text-right {
|
||||
text-align: right;
|
||||
}
|
||||
.body {
|
||||
margin-top: 24px;
|
||||
}
|
||||
.body-text {
|
||||
color: #666666;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
}
|
||||
.body-extra {
|
||||
-webkit-font-smoothing: antialiased;
|
||||
}
|
||||
.body-extra.text-right a {
|
||||
text-decoration: none;
|
||||
color: #333;
|
||||
}
|
||||
.body-extra.text-right a:hover {
|
||||
color: #666;
|
||||
}
|
||||
.button {
|
||||
width: 200px;
|
||||
height: 50px;
|
||||
margin-top: 20px;
|
||||
text-align: center;
|
||||
border-radius: 2px;
|
||||
background: #2D77EE;
|
||||
line-height: 50px;
|
||||
font-size: 20px;
|
||||
color: #FFFFFF;
|
||||
cursor: pointer;
|
||||
}
|
||||
.button:hover {
|
||||
background: rgb(25, 115, 255);
|
||||
border-color: rgb(25, 115, 255);
|
||||
color: #fff;
|
||||
}
|
||||
footer {
|
||||
margin-top: 10px;
|
||||
text-align: right;
|
||||
}
|
||||
.footer-logo {
|
||||
text-align: right;
|
||||
}
|
||||
.footer-logo-image {
|
||||
width: 108px;
|
||||
height: 27px;
|
||||
margin-right: 10px;
|
||||
}
|
||||
.copyright {
|
||||
margin-top: 10px;
|
||||
font-size: 12px;
|
||||
text-align: right;
|
||||
color: #999;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="wrapper">
|
||||
<div class="main">
|
||||
<header>
|
||||
<h3 class="title">{{.RuleName}}</h3>
|
||||
<p class="sub-desc"></p>
|
||||
</header>
|
||||
|
||||
<hr>
|
||||
|
||||
<div class="body">
|
||||
<table cellspacing="0" cellpadding="0" border="0">
|
||||
<tbody>
|
||||
{{if .IsRecovered}}
|
||||
<tr class="succ">
|
||||
<th>级别状态:</th>
|
||||
<td>S{{.Severity}} Recovered</td>
|
||||
</tr>
|
||||
{{else}}
|
||||
<tr class="fail">
|
||||
<th>级别状态:</th>
|
||||
<td>S{{.Severity}} Triggered</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
|
||||
<tr>
|
||||
<th>策略备注:</th>
|
||||
<td>{{.RuleNote}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>设备备注:</th>
|
||||
<td>{{.TargetNote}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>监控指标:</th>
|
||||
<td>{{.TagsJSON}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>触发时值:</th>
|
||||
<td>{{.TriggerValue}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>触发时间:</th>
|
||||
<td>
|
||||
{{timeformat .TriggerTime}}
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>PromQL:</th>
|
||||
<td>
|
||||
{{.PromQl}}
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<hr>
|
||||
|
||||
<footer>
|
||||
<div class="copyright" style="font-style: italic">
|
||||
我们希望与您一起,将监控这个事情,做到极致!
|
||||
</div>
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1 @@
|
|||
{{if .IsRecovered}}Recovered{{else}}Triggered{{end}}: {{.RuleName}} {{.TagsJSON}}
|
|
@ -0,0 +1,6 @@
|
|||
**级别状态**: {{if .IsRecovered}}<font color="info">S{{.Severity}} Recovered</font>{{else}}<font color="warning">S{{.Severity}} Triggered</font>{{end}}
|
||||
**规则标题**: {{.RuleName}}{{if .RuleNote}}
|
||||
**规则备注**: {{.RuleNote}}{{end}}
|
||||
**监控指标**: {{.TagsJSON}}
|
||||
**触发时间**: {{timeformat .TriggerTime}}
|
||||
**触发时值**: {{.TriggerValue}}
|
|
@ -0,0 +1,166 @@
|
|||
# debug, release
|
||||
RunMode = "release"
|
||||
|
||||
# # custom i18n dict config
|
||||
# I18N = "./etc/i18n.json"
|
||||
|
||||
# do not change
|
||||
AdminRole = "Admin"
|
||||
|
||||
# Linkage with notify.py script
|
||||
NotifyChannels = [ "email", "dingtalk", "wecom" ]
|
||||
|
||||
[[ContactKeys]]
|
||||
Label = "Wecom Robot Token"
|
||||
Key = "wecom_robot_token"
|
||||
|
||||
[[ContactKeys]]
|
||||
Label = "Dingtalk Robot Token"
|
||||
Key = "dingtalk_robot_token"
|
||||
|
||||
[Log]
|
||||
# log write dir
|
||||
Dir = "logs"
|
||||
# log level: DEBUG INFO WARNING ERROR
|
||||
Level = "DEBUG"
|
||||
# stdout, stderr, file
|
||||
Output = "stdout"
|
||||
# # rotate by time
|
||||
# KeepHours: 4
|
||||
# # rotate by size
|
||||
# RotateNum = 3
|
||||
# # unit: MB
|
||||
# RotateSize = 256
|
||||
|
||||
[HTTP]
|
||||
# http listening address
|
||||
Host = "0.0.0.0"
|
||||
# http listening port
|
||||
Port = 18000
|
||||
# https cert file path
|
||||
CertFile = ""
|
||||
# https key file path
|
||||
KeyFile = ""
|
||||
# whether print access log
|
||||
PrintAccessLog = true
|
||||
# whether enable pprof
|
||||
PProf = false
|
||||
# http graceful shutdown timeout, unit: s
|
||||
ShutdownTimeout = 30
|
||||
# max content length: 64M
|
||||
MaxContentLength = 67108864
|
||||
# http server read timeout, unit: s
|
||||
ReadTimeout = 20
|
||||
# http server write timeout, unit: s
|
||||
WriteTimeout = 40
|
||||
# http server idle timeout, unit: s
|
||||
IdleTimeout = 120
|
||||
|
||||
[JWTAuth]
|
||||
# signing key
|
||||
SigningKey = "5b94a0fd640fe2765af826acfe42d151"
|
||||
# unit: min
|
||||
AccessExpired = 1500
|
||||
# unit: min
|
||||
RefreshExpired = 10080
|
||||
RedisKeyPrefix = "/jwt/"
|
||||
|
||||
[BasicAuth]
|
||||
user001 = "ccc26da7b9aba533cbb263a36c07dcc5"
|
||||
|
||||
[LDAP]
|
||||
Enable = false
|
||||
Host = "ldap.example.org"
|
||||
Port = 389
|
||||
BaseDn = "dc=example,dc=org"
|
||||
# AD: manange@example.org
|
||||
BindUser = "cn=manager,dc=example,dc=org"
|
||||
BindPass = "*******"
|
||||
# openldap format e.g. (&(uid=%s))
|
||||
# AD format e.g. (&(sAMAccountName=%s))
|
||||
AuthFilter = "(&(uid=%s))"
|
||||
CoverAttributes = true
|
||||
TLS = false
|
||||
StartTLS = true
|
||||
|
||||
[LDAP.Attributes]
|
||||
Nickname = "cn"
|
||||
Phone = "mobile"
|
||||
Email = "mail"
|
||||
|
||||
[Redis]
|
||||
# address, ip:port
|
||||
Address = "127.0.0.1:6379"
|
||||
# requirepass
|
||||
Password = ""
|
||||
# # db
|
||||
# DB = 0
|
||||
|
||||
[Gorm]
|
||||
# enable debug mode or not
|
||||
Debug = true
|
||||
# mysql postgres
|
||||
DBType = "mysql"
|
||||
# unit: s
|
||||
MaxLifetime = 7200
|
||||
# max open connections
|
||||
MaxOpenConns = 150
|
||||
# max idle connections
|
||||
MaxIdleConns = 50
|
||||
# table prefix
|
||||
TablePrefix = ""
|
||||
# enable auto migrate or not
|
||||
EnableAutoMigrate = false
|
||||
|
||||
[MySQL]
|
||||
# mysql address host:port
|
||||
Address = "127.0.0.1:3306"
|
||||
# mysql username
|
||||
User = "root"
|
||||
# mysql password
|
||||
Password = "1234"
|
||||
# database name
|
||||
DBName = "n9e_v5"
|
||||
# connection params
|
||||
Parameters = "charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
|
||||
|
||||
[Postgres]
|
||||
# pg address host:port
|
||||
Address = "127.0.0.1:5432"
|
||||
# pg user
|
||||
User = "root"
|
||||
# pg password
|
||||
Password = "1234"
|
||||
# database name
|
||||
DBName = "n9e_v5"
|
||||
# ssl mode
|
||||
SSLMode = "disable"
|
||||
|
||||
[[Clusters]]
|
||||
# Prometheus cluster name
|
||||
Name = "Default"
|
||||
# Prometheus APIs base url
|
||||
Prom = "http://127.0.0.1:9090"
|
||||
# Basic auth username
|
||||
BasicAuthUser = ""
|
||||
# Basic auth password
|
||||
BasicAuthPass = ""
|
||||
# timeout settings, unit: ms
|
||||
Timeout = 30000
|
||||
DialTimeout = 10000
|
||||
TLSHandshakeTimeout = 30000
|
||||
ExpectContinueTimeout = 1000
|
||||
IdleConnTimeout = 90000
|
||||
# time duration, unit: ms
|
||||
KeepAlive = 30000
|
||||
MaxConnsPerHost = 0
|
||||
MaxIdleConns = 100
|
||||
MaxIdleConnsPerHost = 100
|
||||
|
||||
[Ibex]
|
||||
Address = "http://127.0.0.1:10090"
|
||||
# basic auth
|
||||
BasicAuthUser = "ibex"
|
||||
BasicAuthPass = "ibex"
|
||||
# unit: ms
|
||||
Timeout = 3000
|
70
go.mod
70
go.mod
|
@ -3,53 +3,31 @@ module github.com/didi/nightingale/v5
|
|||
go 1.14
|
||||
|
||||
require (
|
||||
github.com/armon/go-metrics v0.3.4 // indirect
|
||||
github.com/gin-contrib/gzip v0.0.3
|
||||
github.com/dgrijalva/jwt-go v3.2.0+incompatible
|
||||
github.com/fatih/camelcase v1.0.0 // indirect
|
||||
github.com/fatih/structs v1.1.0 // indirect
|
||||
github.com/gin-contrib/pprof v1.3.0
|
||||
github.com/gin-contrib/sessions v0.0.3
|
||||
github.com/gin-gonic/gin v1.7.0
|
||||
github.com/go-kit/kit v0.10.0
|
||||
github.com/go-ldap/ldap/v3 v3.2.4
|
||||
github.com/go-sql-driver/mysql v1.5.0
|
||||
github.com/gogo/protobuf v1.3.2
|
||||
github.com/golang/snappy v0.0.3
|
||||
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 // indirect
|
||||
github.com/gorilla/sessions v1.2.0 // indirect
|
||||
github.com/hashicorp/go-immutable-radix v1.2.0 // indirect
|
||||
github.com/hashicorp/go-msgpack v0.5.5 // indirect
|
||||
github.com/hashicorp/go-uuid v1.0.2 // indirect
|
||||
github.com/hashicorp/golang-lru v0.5.4 // indirect
|
||||
github.com/hashicorp/hcl v1.0.1-0.20190611123218-cf7d376da96d // indirect
|
||||
github.com/magiconair/properties v1.8.2 // indirect
|
||||
github.com/gin-gonic/gin v1.7.4
|
||||
github.com/go-ldap/ldap/v3 v3.4.1
|
||||
github.com/go-redis/redis/v8 v8.11.3
|
||||
github.com/golang/protobuf v1.5.2
|
||||
github.com/golang/snappy v0.0.4
|
||||
github.com/google/uuid v1.3.0
|
||||
github.com/json-iterator/go v1.1.12
|
||||
github.com/koding/multiconfig v0.0.0-20171124222453-69c27309b2d7
|
||||
github.com/mattn/go-isatty v0.0.12
|
||||
github.com/n9e/agent-payload v0.0.0-20210619031503-b72325474651
|
||||
github.com/opentracing-contrib/go-stdlib v1.0.0
|
||||
github.com/opentracing/opentracing-go v1.2.0
|
||||
github.com/orcaman/concurrent-map v0.0.0-20210106121528-16402b402231
|
||||
github.com/orcaman/concurrent-map v0.0.0-20210501183033-44dafcb38ecc
|
||||
github.com/pkg/errors v0.9.1
|
||||
github.com/prometheus/client_golang v1.9.0
|
||||
github.com/prometheus/common v0.17.0
|
||||
github.com/prometheus/prometheus v1.8.2-0.20210220213500-8c8de46003d1
|
||||
github.com/smartystreets/assertions v1.0.0 // indirect
|
||||
github.com/spaolacci/murmur3 v1.1.0 // indirect
|
||||
github.com/spf13/cast v1.3.1-0.20190531151931-f31dc0aaab5a // indirect
|
||||
github.com/spf13/jwalterweatherman v1.1.0 // indirect
|
||||
github.com/spf13/viper v1.7.1
|
||||
github.com/subosito/gotenv v1.2.1-0.20190917103637-de67a6614a4d // indirect
|
||||
github.com/toolkits/pkg v1.1.3
|
||||
github.com/ugorji/go/codec v1.1.7
|
||||
go.uber.org/atomic v1.7.0
|
||||
go.uber.org/automaxprocs v1.4.0 // indirect
|
||||
golang.org/x/text v0.3.5
|
||||
gopkg.in/ini.v1 v1.51.1 // indirect
|
||||
xorm.io/builder v0.3.7
|
||||
xorm.io/xorm v1.0.7
|
||||
github.com/prometheus/client_golang v1.11.0
|
||||
github.com/prometheus/common v0.26.0
|
||||
github.com/prometheus/prometheus v2.5.0+incompatible
|
||||
github.com/toolkits/pkg v1.2.9
|
||||
github.com/urfave/cli/v2 v2.3.0
|
||||
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d // indirect
|
||||
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e // indirect
|
||||
google.golang.org/genproto v0.0.0-20211007155348-82e027067bd4 // indirect
|
||||
google.golang.org/grpc v1.41.0 // indirect
|
||||
gorm.io/driver/mysql v1.1.2
|
||||
gorm.io/driver/postgres v1.1.1
|
||||
gorm.io/gorm v1.21.15
|
||||
)
|
||||
|
||||
// branch 0.9.3-pool-read-binary-3
|
||||
replace github.com/apache/thrift => github.com/m3db/thrift v0.0.0-20190820191926-05b5a2227fe4
|
||||
|
||||
// Fix legacy import path - https://github.com/uber-go/atomic/pull/60
|
||||
replace github.com/uber-go/atomic => github.com/uber-go/atomic v1.4.0
|
||||
|
||||
replace google.golang.org/grpc => google.golang.org/grpc v1.26.0
|
||||
|
|
|
@ -1,425 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/gin-contrib/sessions"
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
"github.com/didi/nightingale/v5/pkg/i18n"
|
||||
"github.com/didi/nightingale/v5/pkg/ierr"
|
||||
)
|
||||
|
||||
const defaultLimit = 20
|
||||
|
||||
func _e(format string, a ...interface{}) error {
|
||||
return fmt.Errorf(_s(format, a...))
|
||||
}
|
||||
|
||||
func _s(format string, a ...interface{}) string {
|
||||
return i18n.Sprintf(format, a...)
|
||||
}
|
||||
|
||||
func dangerous(v interface{}, code ...int) {
|
||||
ierr.Dangerous(v, code...)
|
||||
}
|
||||
|
||||
func bomb(code int, format string, a ...interface{}) {
|
||||
ierr.Bomb(code, _s(format, a...))
|
||||
}
|
||||
|
||||
func bind(c *gin.Context, ptr interface{}) {
|
||||
dangerous(c.ShouldBindJSON(ptr), http.StatusBadRequest)
|
||||
}
|
||||
|
||||
func urlParamStr(c *gin.Context, field string) string {
|
||||
val := c.Param(field)
|
||||
|
||||
if val == "" {
|
||||
bomb(http.StatusBadRequest, "url param[%s] is blank", field)
|
||||
}
|
||||
|
||||
return val
|
||||
}
|
||||
|
||||
func urlParamInt64(c *gin.Context, field string) int64 {
|
||||
strval := urlParamStr(c, field)
|
||||
intval, err := strconv.ParseInt(strval, 10, 64)
|
||||
if err != nil {
|
||||
bomb(http.StatusBadRequest, "cannot convert %s to int64", strval)
|
||||
}
|
||||
|
||||
return intval
|
||||
}
|
||||
|
||||
func urlParamInt(c *gin.Context, field string) int {
|
||||
return int(urlParamInt64(c, field))
|
||||
}
|
||||
|
||||
func queryStr(c *gin.Context, key string, defaultVal ...string) string {
|
||||
val := c.Query(key)
|
||||
if val != "" {
|
||||
return val
|
||||
}
|
||||
|
||||
if len(defaultVal) == 0 {
|
||||
bomb(http.StatusBadRequest, "query param[%s] is necessary", key)
|
||||
}
|
||||
|
||||
return defaultVal[0]
|
||||
}
|
||||
|
||||
func queryInt(c *gin.Context, key string, defaultVal ...int) int {
|
||||
strv := c.Query(key)
|
||||
if strv != "" {
|
||||
intv, err := strconv.Atoi(strv)
|
||||
if err != nil {
|
||||
bomb(http.StatusBadRequest, "cannot convert [%s] to int", strv)
|
||||
}
|
||||
return intv
|
||||
}
|
||||
|
||||
if len(defaultVal) == 0 {
|
||||
bomb(http.StatusBadRequest, "query param[%s] is necessary", key)
|
||||
}
|
||||
|
||||
return defaultVal[0]
|
||||
}
|
||||
|
||||
func queryInt64(c *gin.Context, key string, defaultVal ...int64) int64 {
|
||||
strv := c.Query(key)
|
||||
if strv != "" {
|
||||
intv, err := strconv.ParseInt(strv, 10, 64)
|
||||
if err != nil {
|
||||
bomb(http.StatusBadRequest, "cannot convert [%s] to int64", strv)
|
||||
}
|
||||
return intv
|
||||
}
|
||||
|
||||
if len(defaultVal) == 0 {
|
||||
bomb(http.StatusBadRequest, "query param[%s] is necessary", key)
|
||||
}
|
||||
|
||||
return defaultVal[0]
|
||||
}
|
||||
|
||||
func queryBool(c *gin.Context, key string, defaultVal ...bool) bool {
|
||||
strv := c.Query(key)
|
||||
if strv != "" {
|
||||
if strv == "true" || strv == "1" || strv == "on" || strv == "checked" || strv == "yes" || strv == "Y" {
|
||||
return true
|
||||
} else if strv == "false" || strv == "0" || strv == "off" || strv == "no" || strv == "N" {
|
||||
return false
|
||||
} else {
|
||||
bomb(http.StatusBadRequest, "unknown arg[%s] value: %s", key, strv)
|
||||
}
|
||||
}
|
||||
|
||||
if len(defaultVal) == 0 {
|
||||
bomb(http.StatusBadRequest, "arg[%s] is necessary", key)
|
||||
}
|
||||
|
||||
return defaultVal[0]
|
||||
}
|
||||
|
||||
func offset(c *gin.Context, limit int) int {
|
||||
if limit <= 0 {
|
||||
limit = 10
|
||||
}
|
||||
|
||||
page := queryInt(c, "p", 1)
|
||||
return (page - 1) * limit
|
||||
}
|
||||
|
||||
func renderMessage(c *gin.Context, v interface{}, statusCode ...int) {
|
||||
code := 200
|
||||
if len(statusCode) > 0 {
|
||||
code = statusCode[0]
|
||||
}
|
||||
if v == nil {
|
||||
c.JSON(code, gin.H{"err": ""})
|
||||
return
|
||||
}
|
||||
|
||||
switch t := v.(type) {
|
||||
case string:
|
||||
c.JSON(code, gin.H{"err": _s(t)})
|
||||
case error:
|
||||
c.JSON(code, gin.H{"err": t.Error()})
|
||||
}
|
||||
}
|
||||
|
||||
func renderData(c *gin.Context, data interface{}, err error, statusCode ...int) {
|
||||
code := 200
|
||||
if len(statusCode) > 0 {
|
||||
code = statusCode[0]
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
c.JSON(code, gin.H{"dat": data, "err": ""})
|
||||
return
|
||||
}
|
||||
|
||||
renderMessage(c, err.Error(), code)
|
||||
}
|
||||
|
||||
func renderZeroPage(c *gin.Context) {
|
||||
renderData(c, gin.H{
|
||||
"list": []int{},
|
||||
"total": 0,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
type idsForm struct {
|
||||
Ids []int64 `json:"ids"`
|
||||
}
|
||||
|
||||
func (f idsForm) Validate() {
|
||||
if len(f.Ids) == 0 {
|
||||
bomb(http.StatusBadRequest, "ids empty")
|
||||
}
|
||||
}
|
||||
|
||||
func cookieUsername(c *gin.Context) string {
|
||||
session := sessions.Default(c)
|
||||
|
||||
value := session.Get("username")
|
||||
if value == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
return value.(string)
|
||||
}
|
||||
|
||||
func headerUsername(c *gin.Context) string {
|
||||
token := c.GetHeader("Authorization")
|
||||
if token == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
ut, err := models.UserTokenGet("token=?", strings.TrimPrefix(token, "Bearer "))
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if ut == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
return ut.Username
|
||||
}
|
||||
|
||||
// must get username
|
||||
func loginUsername(c *gin.Context) string {
|
||||
usernameInterface, has := c.Get("username")
|
||||
if has {
|
||||
return usernameInterface.(string)
|
||||
}
|
||||
|
||||
username := cookieUsername(c)
|
||||
if username == "" {
|
||||
username = headerUsername(c)
|
||||
}
|
||||
|
||||
if username == "" {
|
||||
remoteAddr := c.Request.RemoteAddr
|
||||
idx := strings.LastIndex(remoteAddr, ":")
|
||||
ip := ""
|
||||
if idx > 0 {
|
||||
ip = remoteAddr[0:idx]
|
||||
}
|
||||
|
||||
if (ip == "127.0.0.1" || ip == "[::1]") && c.GetHeader("X-Local") == "1" {
|
||||
//本地调用都当成是root用户在调用
|
||||
username = "root"
|
||||
}
|
||||
}
|
||||
|
||||
if username == "" {
|
||||
ierr.Bomb(http.StatusUnauthorized, "unauthorized")
|
||||
}
|
||||
|
||||
c.Set("username", username)
|
||||
return username
|
||||
}
|
||||
|
||||
func loginUser(c *gin.Context) *models.User {
|
||||
username := loginUsername(c)
|
||||
|
||||
user, err := models.UserGetByUsername(username)
|
||||
dangerous(err)
|
||||
|
||||
if user == nil {
|
||||
ierr.Bomb(http.StatusUnauthorized, "unauthorized")
|
||||
}
|
||||
|
||||
if user.Status == 1 {
|
||||
ierr.Bomb(http.StatusUnauthorized, "unauthorized")
|
||||
}
|
||||
|
||||
return user
|
||||
}
|
||||
|
||||
func User(id int64) *models.User {
|
||||
obj, err := models.UserGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
bomb(http.StatusNotFound, "No such user")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func UserGroup(id int64) *models.UserGroup {
|
||||
obj, err := models.UserGroupGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
bomb(http.StatusNotFound, "No such user group")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func Classpath(id int64) *models.Classpath {
|
||||
obj, err := models.ClasspathGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
bomb(http.StatusNotFound, "No such classpath")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func Mute(id int64) *models.Mute {
|
||||
obj, err := models.MuteGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
bomb(http.StatusNotFound, "No such mute config")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func Dashboard(id int64) *models.Dashboard {
|
||||
obj, err := models.DashboardGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
bomb(http.StatusNotFound, "No such dashboard")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func ChartGroup(id int64) *models.ChartGroup {
|
||||
obj, err := models.ChartGroupGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
bomb(http.StatusNotFound, "No such chart group")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func Chart(id int64) *models.Chart {
|
||||
obj, err := models.ChartGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
bomb(http.StatusNotFound, "No such chart")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func AlertRule(id int64) *models.AlertRule {
|
||||
obj, err := models.AlertRuleGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
bomb(http.StatusNotFound, "No such alert rule")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func AlertRuleGroup(id int64) *models.AlertRuleGroup {
|
||||
obj, err := models.AlertRuleGroupGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
bomb(http.StatusNotFound, "No such alert rule group")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func AlertEvent(id int64) *models.AlertEvent {
|
||||
obj, err := models.AlertEventGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
bomb(http.StatusNotFound, "No such alert event")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func HistoryAlertEvent(id int64) *models.HistoryAlertEvent {
|
||||
obj, err := models.HistoryAlertEventGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
bomb(http.StatusNotFound, "No such alert all event")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func CollectRule(id int64) *models.CollectRule {
|
||||
obj, err := models.CollectRuleGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
bomb(http.StatusNotFound, "No such collect rule")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func MetricDescription(id int64) *models.MetricDescription {
|
||||
obj, err := models.MetricDescriptionGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
bomb(http.StatusNotFound, "No such metric description")
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
||||
|
||||
func Resource(id int64) *models.Resource {
|
||||
obj, err := models.ResourceGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
||||
if obj == nil {
|
||||
bomb(http.StatusNotFound, "No such resource")
|
||||
}
|
||||
|
||||
classpathResources, err := models.ClasspathResourceGets("res_ident=?", obj.Ident)
|
||||
dangerous(err)
|
||||
for _, cr := range classpathResources {
|
||||
obj.ClasspathIds = append(obj.ClasspathIds, cr.ClasspathId)
|
||||
}
|
||||
|
||||
return obj
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/didi/nightingale/v5/pkg/ierr"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func login() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
username := loginUsername(c)
|
||||
c.Set("username", username)
|
||||
// 这里调用loginUser主要是为了判断当前用户是否被disable了
|
||||
loginUser(c)
|
||||
c.Next()
|
||||
}
|
||||
}
|
||||
|
||||
func admin() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
username := loginUsername(c)
|
||||
c.Set("username", username)
|
||||
|
||||
user := loginUser(c)
|
||||
|
||||
roles := strings.Fields(user.RolesForDB)
|
||||
found := false
|
||||
for i := 0; i < len(roles); i++ {
|
||||
if roles[i] == "Admin" {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
ierr.Bomb(http.StatusForbidden, "forbidden")
|
||||
}
|
||||
|
||||
c.Next()
|
||||
}
|
||||
}
|
|
@ -1,108 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-contrib/sessions"
|
||||
"github.com/gin-contrib/sessions/cookie"
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/config"
|
||||
"github.com/didi/nightingale/v5/pkg/iaop"
|
||||
)
|
||||
|
||||
var srv = &http.Server{
|
||||
ReadTimeout: 30 * time.Second,
|
||||
WriteTimeout: 30 * time.Second,
|
||||
MaxHeaderBytes: 1 << 30,
|
||||
}
|
||||
|
||||
var skipPaths = []string{
|
||||
"/api/n9e/auth/login",
|
||||
"/api/n9e/self/password",
|
||||
"/api/n9e/push",
|
||||
"/v1/n9e/series",
|
||||
}
|
||||
|
||||
func Start() {
|
||||
c := config.Config
|
||||
|
||||
loggerMid := iaop.LoggerWithConfig(iaop.LoggerConfig{SkipPaths: skipPaths})
|
||||
recoveryMid := iaop.Recovery()
|
||||
|
||||
if strings.ToLower(c.HTTP.Mode) == "release" {
|
||||
gin.SetMode(gin.ReleaseMode)
|
||||
iaop.DisableConsoleColor()
|
||||
}
|
||||
|
||||
r := gin.New()
|
||||
r.Use(recoveryMid)
|
||||
|
||||
// whether print access log
|
||||
if c.HTTP.Access {
|
||||
r.Use(loggerMid)
|
||||
}
|
||||
|
||||
// use cookie to save session
|
||||
store := cookie.NewStore([]byte(config.Config.HTTP.CookieSecret))
|
||||
store.Options(sessions.Options{
|
||||
Domain: config.Config.HTTP.CookieDomain,
|
||||
MaxAge: config.Config.HTTP.CookieMaxAge,
|
||||
Secure: config.Config.HTTP.CookieSecure,
|
||||
HttpOnly: config.Config.HTTP.CookieHttpOnly,
|
||||
Path: "/",
|
||||
})
|
||||
session := sessions.Sessions(config.Config.HTTP.CookieName, store)
|
||||
r.Use(session)
|
||||
|
||||
configRoutes(r)
|
||||
configNoRoute(r)
|
||||
|
||||
srv.Addr = c.HTTP.Listen
|
||||
srv.Handler = r
|
||||
|
||||
go func() {
|
||||
fmt.Println("http.listening:", srv.Addr)
|
||||
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
||||
fmt.Printf("listening %s occur error: %s\n", srv.Addr, err)
|
||||
os.Exit(3)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Shutdown http server
|
||||
func Shutdown() {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
if err := srv.Shutdown(ctx); err != nil {
|
||||
fmt.Println("cannot shutdown http server:", err)
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
// catching ctx.Done(). timeout of 5 seconds.
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
fmt.Println("shutdown http server timeout of 5 seconds.")
|
||||
default:
|
||||
fmt.Println("http server stopped")
|
||||
}
|
||||
}
|
||||
|
||||
func configNoRoute(r *gin.Engine) {
|
||||
r.NoRoute(func(c *gin.Context) {
|
||||
arr := strings.Split(c.Request.URL.Path, ".")
|
||||
suffix := arr[len(arr)-1]
|
||||
switch suffix {
|
||||
case "png", "jpeg", "jpg", "svg", "ico", "gif", "css", "js", "html", "htm", "gz", "map":
|
||||
c.File(path.Join(strings.Split("pub/"+c.Request.URL.Path, "/")...))
|
||||
default:
|
||||
c.File(path.Join("pub", "index.html"))
|
||||
}
|
||||
})
|
||||
}
|
220
http/router.go
220
http/router.go
|
@ -1,220 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/gin-contrib/gzip"
|
||||
"github.com/gin-contrib/pprof"
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/config"
|
||||
)
|
||||
|
||||
func configRoutes(r *gin.Engine) {
|
||||
/*
|
||||
csrfMid := csrf.Middleware(csrf.Options{
|
||||
Secret: config.Config.HTTP.CsrfSecret,
|
||||
ErrorFunc: func(c *gin.Context) {
|
||||
c.JSON(452, gin.H{"err": "csrf token mismatch"})
|
||||
c.Abort()
|
||||
},
|
||||
})
|
||||
*/
|
||||
|
||||
if config.Config.HTTP.Pprof {
|
||||
pprof.Register(r, "/api/debug/pprof")
|
||||
}
|
||||
|
||||
guest := r.Group("/api/n9e")
|
||||
{
|
||||
guest.GET("/ping", func(c *gin.Context) {
|
||||
c.String(200, "pong")
|
||||
})
|
||||
guest.GET("/pid", func(c *gin.Context) {
|
||||
c.String(200, fmt.Sprintf("%d", os.Getpid()))
|
||||
})
|
||||
guest.GET("/addr", func(c *gin.Context) {
|
||||
c.String(200, c.Request.RemoteAddr)
|
||||
})
|
||||
guest.GET("/version", func(c *gin.Context) {
|
||||
c.String(200, config.Version)
|
||||
})
|
||||
|
||||
guest.POST("/auth/login", loginPost)
|
||||
guest.GET("/auth/logout", logoutGet)
|
||||
|
||||
// 开源版本,为了支持图表分享功能,允许匿名查询数据
|
||||
guest.POST("/query", GetData)
|
||||
guest.POST("/instant-query", GetDataInstant)
|
||||
guest.POST("/tag-pairs", GetTagPairs)
|
||||
guest.POST("/tag-keys", GetTagKeys)
|
||||
guest.POST("/tag-values", GetTagValues)
|
||||
guest.POST("/tag-metrics", GetMetrics)
|
||||
guest.GET("/check-promql", checkPromeQl)
|
||||
}
|
||||
|
||||
// for brower, expose location in nginx.conf
|
||||
pages := r.Group("/api/n9e")
|
||||
|
||||
{
|
||||
|
||||
pages.GET("/csrf", func(c *gin.Context) {
|
||||
// renderData(c, csrf.GetToken(c), nil)
|
||||
renderData(c, "not supported", nil)
|
||||
})
|
||||
|
||||
pages.GET("/roles", rolesGet)
|
||||
pages.GET("/self/profile", selfProfileGet)
|
||||
pages.PUT("/self/profile", selfProfilePut)
|
||||
pages.PUT("/self/password", selfPasswordPut)
|
||||
pages.GET("/self/token", selfTokenGets)
|
||||
pages.POST("/self/token", selfTokenPost)
|
||||
pages.PUT("/self/token", selfTokenPut)
|
||||
pages.GET("/users", login(), userGets)
|
||||
pages.POST("/users", admin(), userAddPost)
|
||||
pages.GET("/user/:id/profile", login(), userProfileGet)
|
||||
pages.PUT("/user/:id/profile", admin(), userProfilePut)
|
||||
pages.PUT("/user/:id/status", admin(), userStatusPut)
|
||||
pages.PUT("/user/:id/password", admin(), userPasswordPut)
|
||||
pages.DELETE("/user/:id", admin(), userDel)
|
||||
|
||||
pages.GET("/user-groups", login(), userGroupListGet)
|
||||
pages.GET("/user-groups/mine", login(), userGroupMineGet)
|
||||
pages.POST("/user-groups", login(), userGroupAdd)
|
||||
pages.PUT("/user-group/:id", login(), userGroupPut)
|
||||
pages.GET("/user-group/:id", login(), userGroupGet)
|
||||
pages.POST("/user-group/:id/members", login(), userGroupMemberAdd)
|
||||
pages.DELETE("/user-group/:id/members", login(), userGroupMemberDel)
|
||||
pages.DELETE("/user-group/:id", login(), userGroupDel)
|
||||
|
||||
pages.GET("/classpaths", login(), classpathListGets)
|
||||
pages.GET("/classpaths/tree-node/:id", login(), classpathListNodeGetsById)
|
||||
pages.POST("/classpaths", login(), classpathAdd)
|
||||
pages.PUT("/classpath/:id", login(), classpathPut)
|
||||
pages.DELETE("/classpath/:id", login(), classpathDel)
|
||||
pages.POST("/classpath/:id/resources", login(), classpathAddResources)
|
||||
pages.DELETE("/classpath/:id/resources", login(), classpathDelResources)
|
||||
pages.GET("/classpath/:id/resources", login(), classpathGetsResources)
|
||||
|
||||
pages.GET("/classpaths/favorites", login(), classpathFavoriteGet)
|
||||
pages.POST("/classpath/:id/favorites", login(), classpathFavoriteAdd)
|
||||
pages.DELETE("/classpath/:id/favorites", login(), classpathFavoriteDel)
|
||||
|
||||
pages.GET("/resources", login(), resourcesQuery)
|
||||
pages.PUT("/resources/note", resourceNotePut)
|
||||
pages.PUT("/resources/tags", resourceTagsPut)
|
||||
pages.PUT("/resources/classpaths", resourceClasspathsPut)
|
||||
pages.PUT("/resources/mute", resourceMutePut)
|
||||
pages.GET("/resource/:id", login(), resourceGet)
|
||||
pages.DELETE("/resource/:id", login(), resourceDel)
|
||||
|
||||
pages.GET("/mutes", login(), muteGets)
|
||||
pages.POST("/mutes", login(), muteAdd)
|
||||
pages.GET("/mute/:id", login(), muteGet)
|
||||
pages.DELETE("/mute/:id", login(), muteDel)
|
||||
|
||||
pages.GET("/dashboards", login(), dashboardGets)
|
||||
pages.POST("/dashboards", login(), dashboardAdd)
|
||||
pages.POST("/dashboards-clone", login(), dashboardClone)
|
||||
pages.POST("/dashboards/import", login(), dashboardImport)
|
||||
pages.POST("/dashboards/export", login(), dashboardExport)
|
||||
pages.GET("/dashboard/:id", login(), dashboardGet)
|
||||
pages.PUT("/dashboard/:id", login(), dashboardPut)
|
||||
pages.DELETE("/dashboard/:id", login(), dashboardDel)
|
||||
pages.POST("/dashboard/:id/favorites", login(), dashboardFavoriteAdd)
|
||||
pages.DELETE("/dashboard/:id/favorites", login(), dashboardFavoriteDel)
|
||||
pages.GET("/dashboard/:id/chart-groups", login(), chartGroupGets)
|
||||
pages.POST("/dashboard/:id/chart-groups", login(), chartGroupAdd)
|
||||
|
||||
pages.PUT("/chart-groups", login(), chartGroupsPut)
|
||||
pages.DELETE("/chart-group/:id", login(), chartGroupDel)
|
||||
pages.GET("/chart-group/:id/charts", login(), chartGets)
|
||||
pages.POST("/chart-group/:id/charts", login(), chartAdd)
|
||||
pages.PUT("/chart/:id", login(), chartPut)
|
||||
pages.DELETE("/chart/:id", login(), chartDel)
|
||||
pages.PUT("/charts/configs", login(), chartConfigsPut)
|
||||
pages.GET("/charts/tmps", chartTmpGets)
|
||||
pages.POST("/charts/tmps", login(), chartTmpAdd)
|
||||
|
||||
pages.GET("/alert-rule-groups", login(), alertRuleGroupGets)
|
||||
pages.GET("/alert-rule-groups/favorites", login(), alertRuleGroupFavoriteGet)
|
||||
pages.POST("/alert-rule-groups", login(), alertRuleGroupAdd)
|
||||
pages.GET("/alert-rule-group/:id", login(), alertRuleGroupGet)
|
||||
pages.GET("/alert-rule-group/:id/alert-rules", login(), alertRuleOfGroupGet)
|
||||
pages.DELETE("/alert-rule-group/:id/alert-rules", login(), alertRuleOfGroupDel)
|
||||
pages.PUT("/alert-rule-group/:id", login(), alertRuleGroupPut)
|
||||
pages.DELETE("/alert-rule-group/:id", login(), alertRuleGroupDel)
|
||||
pages.POST("/alert-rule-group/:id/favorites", login(), alertRuleGroupFavoriteAdd)
|
||||
pages.DELETE("/alert-rule-group/:id/favorites", login(), alertRuleGroupFavoriteDel)
|
||||
|
||||
pages.POST("/alert-rules", login(), alertRuleAdd)
|
||||
pages.PUT("/alert-rules/status", login(), alertRuleStatusPut)
|
||||
pages.PUT("/alert-rules/notify-groups", login(), alertRuleNotifyGroupsPut)
|
||||
pages.PUT("/alert-rules/notify-channels", login(), alertRuleNotifyChannelsPut)
|
||||
pages.PUT("/alert-rules/append-tags", login(), alertRuleAppendTagsPut)
|
||||
pages.GET("/alert-rule/:id", login(), alertRuleGet)
|
||||
pages.PUT("/alert-rule/:id", login(), alertRulePut)
|
||||
pages.DELETE("/alert-rule/:id", login(), alertRuleDel)
|
||||
|
||||
pages.GET("/alert-events", login(), alertEventGets)
|
||||
pages.DELETE("/alert-events", login(), alertEventsDel)
|
||||
pages.GET("/alert-event/:id", login(), alertEventGet)
|
||||
pages.DELETE("/alert-event/:id", login(), alertEventDel)
|
||||
// pages.PUT("/alert-event/:id", login(), alertEventNotePut)
|
||||
|
||||
pages.GET("/history-alert-events", login(), historyAlertEventGets)
|
||||
pages.GET("/history-alert-event/:id", login(), historyAlertEventGet)
|
||||
|
||||
pages.GET("/classpath/:id/collect-rules", login(), collectRuleGets)
|
||||
pages.POST("/collect-rules", login(), collectRuleAdd)
|
||||
pages.DELETE("/collect-rules", login(), collectRuleDel)
|
||||
pages.PUT("/collect-rule/:id", login(), collectRulePut)
|
||||
pages.POST("/log/check", regExpCheck)
|
||||
|
||||
pages.GET("/metric-descriptions", metricDescriptionGets)
|
||||
pages.POST("/metric-descriptions", login(), metricDescriptionAdd)
|
||||
pages.DELETE("/metric-descriptions", login(), metricDescriptionDel)
|
||||
pages.PUT("/metric-description/:id", login(), metricDescriptionPut)
|
||||
|
||||
pages.GET("/contact-channels", contactChannelsGet)
|
||||
pages.GET("/notify-channels", notifyChannelsGet)
|
||||
|
||||
pages.GET("/tpl/list", tplNameGets)
|
||||
pages.GET("/tpl/content", tplGet)
|
||||
|
||||
pages.GET("/status", Status)
|
||||
|
||||
}
|
||||
|
||||
// for brower, expose location in nginx.conf
|
||||
pagesV2 := r.Group("/api/n9e/v2")
|
||||
{
|
||||
pagesV2.POST("/collect-rules", login(), collectRulesAdd)
|
||||
}
|
||||
|
||||
// for thirdparty, do not expose location in nginx.conf
|
||||
v1 := r.Group("/v1/n9e")
|
||||
{
|
||||
v1.POST("/query", GetData)
|
||||
v1.POST("/instant-query", GetDataInstant)
|
||||
v1.POST("/tag-keys", GetTagKeys)
|
||||
v1.POST("/tag-values", GetTagValues)
|
||||
v1.POST("/tag-pairs", GetTagPairs)
|
||||
v1.POST("/tag-metrics", GetMetrics)
|
||||
v1.POST("/push", PushData)
|
||||
v1.GET("/collect-rules-belong-to-ident", collectRuleGetsByIdent)
|
||||
v1.GET("/collect-rules-summary", collectRuleSummaryGetByIdent)
|
||||
|
||||
v1.GET("/can-do-op-by-name", login(), canDoOpByName)
|
||||
v1.GET("/can-do-op-by-token", login(), canDoOpByToken)
|
||||
v1.GET("/get-user-by-name", login(), getUserByName)
|
||||
v1.GET("/get-user-by-token", login(), getUserByToken)
|
||||
}
|
||||
|
||||
push := r.Group("/v1/n9e/series").Use(gzip.Gzip(gzip.DefaultCompression))
|
||||
{
|
||||
push.POST("", PushSeries)
|
||||
}
|
||||
|
||||
}
|
|
@ -1,82 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
func alertEventGets(c *gin.Context) {
|
||||
stime := queryInt64(c, "stime", 0)
|
||||
etime := queryInt64(c, "etime", 0)
|
||||
hours := queryInt64(c, "hours", 0)
|
||||
now := time.Now().Unix()
|
||||
if hours != 0 {
|
||||
stime = now - 3600*hours
|
||||
etime = now + 3600*24
|
||||
}
|
||||
|
||||
if stime != 0 && etime == 0 {
|
||||
etime = now + 3600*24
|
||||
}
|
||||
|
||||
query := queryStr(c, "query", "")
|
||||
priority := queryInt(c, "priority", -1)
|
||||
status := queryInt(c, "status", -1)
|
||||
limit := queryInt(c, "limit", defaultLimit)
|
||||
|
||||
total, err := models.AlertEventTotal(stime, etime, query, status, priority)
|
||||
dangerous(err)
|
||||
|
||||
list, err := models.AlertEventGets(stime, etime, query, status, priority, limit, offset(c, limit))
|
||||
dangerous(err)
|
||||
|
||||
for i := 0; i < len(list); i++ {
|
||||
dangerous(list[i].FillObjs())
|
||||
}
|
||||
|
||||
if len(list) == 0 {
|
||||
renderZeroPage(c)
|
||||
return
|
||||
}
|
||||
|
||||
renderData(c, map[string]interface{}{
|
||||
"total": total,
|
||||
"list": list,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func alertEventGet(c *gin.Context) {
|
||||
ae := AlertEvent(urlParamInt64(c, "id"))
|
||||
dangerous(ae.FillObjs())
|
||||
renderData(c, ae, nil)
|
||||
}
|
||||
|
||||
type alertEventNoteForm struct {
|
||||
EventNote string `json:"event_note"`
|
||||
}
|
||||
|
||||
// func alertEventNotePut(c *gin.Context) {
|
||||
// var f alertEventNoteForm
|
||||
// bind(c, &f)
|
||||
|
||||
// me := loginUser(c).MustPerm("alert_event_modify")
|
||||
// ae := AlertEvent(urlParamInt64(c, "id"))
|
||||
|
||||
// renderMessage(c, models.AlertEventUpdateEventNote(ae.Id, ae.HashId, f.EventNote, me.Id))
|
||||
// }
|
||||
|
||||
func alertEventDel(c *gin.Context) {
|
||||
loginUser(c).MustPerm("alert_event_delete")
|
||||
renderMessage(c, AlertEvent(urlParamInt64(c, "id")).Del())
|
||||
}
|
||||
|
||||
func alertEventsDel(c *gin.Context) {
|
||||
var f idsForm
|
||||
bind(c, &f)
|
||||
f.Validate()
|
||||
loginUser(c).MustPerm("alert_event_delete")
|
||||
renderMessage(c, models.AlertEventsDel(f.Ids))
|
||||
}
|
|
@ -1,351 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
|
||||
"github.com/didi/nightingale/v5/cache"
|
||||
"github.com/didi/nightingale/v5/config"
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
func alertRuleGet(c *gin.Context) {
|
||||
alertRule := AlertRule(urlParamInt64(c, "id"))
|
||||
alertRuleFillUserAndGroups(alertRule)
|
||||
renderData(c, alertRule, nil)
|
||||
}
|
||||
|
||||
type alertRuleForm struct {
|
||||
GroupId int64 `json:"group_id"`
|
||||
Name string `json:"name"`
|
||||
Note string `json:"note"`
|
||||
Type int `json:"type"`
|
||||
Status int `json:"status"`
|
||||
Expression json.RawMessage `json:"expression"`
|
||||
AppendTags string `json:"append_tags"`
|
||||
EnableStime string `json:"enable_stime"`
|
||||
EnableEtime string `json:"enable_etime"`
|
||||
EnableDaysOfWeek string `json:"enable_days_of_week"`
|
||||
AlertDuration int `json:"alert_duration"`
|
||||
RecoveryNotify int `json:"recovery_notify"`
|
||||
Priority int `json:"priority"`
|
||||
NotifyChannels string `json:"notify_channels"`
|
||||
NotifyGroups string `json:"notify_groups"`
|
||||
NotifyUsers string `json:"notify_users"`
|
||||
Callbacks string `json:"callbacks"`
|
||||
RunbookUrl string `json:"runbook_url"`
|
||||
}
|
||||
|
||||
func alertRuleAdd(c *gin.Context) {
|
||||
var f []alertRuleForm
|
||||
bind(c, &f)
|
||||
|
||||
me := loginUser(c).MustPerm("alert_rule_create")
|
||||
var ids []int64
|
||||
for _, alertRule := range f {
|
||||
arg := AlertRuleGroup(alertRule.GroupId)
|
||||
alertRuleWritePermCheck(arg, me)
|
||||
|
||||
ar := models.AlertRule{
|
||||
GroupId: alertRule.GroupId,
|
||||
Name: alertRule.Name,
|
||||
Type: alertRule.Type,
|
||||
Note: alertRule.Note,
|
||||
Status: alertRule.Status,
|
||||
Expression: alertRule.Expression,
|
||||
AlertDuration: alertRule.AlertDuration,
|
||||
AppendTags: alertRule.AppendTags,
|
||||
EnableStime: alertRule.EnableStime,
|
||||
EnableEtime: alertRule.EnableEtime,
|
||||
EnableDaysOfWeek: alertRule.EnableDaysOfWeek,
|
||||
RecoveryNotify: alertRule.RecoveryNotify,
|
||||
Priority: alertRule.Priority,
|
||||
NotifyChannels: alertRule.NotifyChannels,
|
||||
NotifyGroups: alertRule.NotifyGroups,
|
||||
NotifyUsers: alertRule.NotifyUsers,
|
||||
Callbacks: alertRule.Callbacks,
|
||||
RunbookUrl: alertRule.RunbookUrl,
|
||||
CreateBy: me.Username,
|
||||
UpdateBy: me.Username,
|
||||
}
|
||||
dangerous(ar.Add())
|
||||
ids = append(ids, ar.Id)
|
||||
}
|
||||
|
||||
renderData(c, ids, nil)
|
||||
}
|
||||
|
||||
func alertRulePut(c *gin.Context) {
|
||||
var f alertRuleForm
|
||||
bind(c, &f)
|
||||
|
||||
me := loginUser(c).MustPerm("alert_rule_modify")
|
||||
ar := AlertRule(urlParamInt64(c, "id"))
|
||||
arg := AlertRuleGroup(ar.GroupId)
|
||||
alertRuleWritePermCheck(arg, me)
|
||||
|
||||
if ar.Name != f.Name {
|
||||
num, err := models.AlertRuleCount("group_id=? and name=? and id<>?", ar.GroupId, f.Name, ar.Id)
|
||||
dangerous(err)
|
||||
|
||||
if num > 0 {
|
||||
bomb(200, "Alert rule %s already exists", f.Name)
|
||||
}
|
||||
}
|
||||
|
||||
ar.Name = f.Name
|
||||
ar.Note = f.Note
|
||||
ar.Type = f.Type
|
||||
ar.Status = f.Status
|
||||
ar.AlertDuration = f.AlertDuration
|
||||
ar.Expression = f.Expression
|
||||
ar.AppendTags = f.AppendTags
|
||||
ar.EnableStime = f.EnableStime
|
||||
ar.EnableEtime = f.EnableEtime
|
||||
ar.EnableDaysOfWeek = f.EnableDaysOfWeek
|
||||
ar.RecoveryNotify = f.RecoveryNotify
|
||||
ar.Priority = f.Priority
|
||||
ar.NotifyChannels = f.NotifyChannels
|
||||
ar.NotifyGroups = f.NotifyGroups
|
||||
ar.NotifyUsers = f.NotifyUsers
|
||||
ar.Callbacks = f.Callbacks
|
||||
ar.RunbookUrl = f.RunbookUrl
|
||||
ar.CreateBy = me.Username
|
||||
ar.UpdateAt = time.Now().Unix()
|
||||
ar.UpdateBy = me.Username
|
||||
|
||||
renderMessage(c, ar.Update(
|
||||
"name",
|
||||
"note",
|
||||
"type",
|
||||
"status",
|
||||
"alert_duration",
|
||||
"expression",
|
||||
"res_filters",
|
||||
"tags_filters",
|
||||
"append_tags",
|
||||
"enable_stime",
|
||||
"enable_etime",
|
||||
"enable_days_of_week",
|
||||
"recovery_notify",
|
||||
"priority",
|
||||
"notify_channels",
|
||||
"notify_groups",
|
||||
"notify_users",
|
||||
"callbacks",
|
||||
"runbook_url",
|
||||
"update_at",
|
||||
"update_by",
|
||||
))
|
||||
}
|
||||
|
||||
type alertRuleStatusForm struct {
|
||||
Ids []int64 `json:"ids"`
|
||||
Status int `json:"status"`
|
||||
}
|
||||
|
||||
func alertRuleStatusPut(c *gin.Context) {
|
||||
var f alertRuleStatusForm
|
||||
bind(c, &f)
|
||||
me := loginUser(c).MustPerm("alert_rule_modify")
|
||||
|
||||
if len(f.Ids) == 0 {
|
||||
bomb(http.StatusBadRequest, "ids is empty")
|
||||
}
|
||||
|
||||
for _, id := range f.Ids {
|
||||
alertRule := AlertRule(id)
|
||||
arg := AlertRuleGroup(alertRule.GroupId)
|
||||
alertRuleWritePermCheck(arg, me)
|
||||
}
|
||||
|
||||
renderMessage(c, models.AlertRuleUpdateStatus(f.Ids, f.Status, me.Username))
|
||||
}
|
||||
|
||||
type alertRuleNotifyGroupsForm struct {
|
||||
Ids []int64 `json:"ids"`
|
||||
NotifyGroups string `json:"notify_groups"`
|
||||
NotifyUsers string `json:"notify_users"`
|
||||
}
|
||||
|
||||
func alertRuleNotifyGroupsPut(c *gin.Context) {
|
||||
var f alertRuleNotifyGroupsForm
|
||||
bind(c, &f)
|
||||
//用户有修改告警策略的权限
|
||||
me := loginUser(c).MustPerm("alert_rule_modify")
|
||||
//id不存在
|
||||
if len(f.Ids) == 0 {
|
||||
bomb(http.StatusBadRequest, "ids is empty")
|
||||
}
|
||||
|
||||
for _, id := range f.Ids {
|
||||
alertRule := AlertRule(id)
|
||||
arg := AlertRuleGroup(alertRule.GroupId)
|
||||
alertRuleWritePermCheck(arg, me)
|
||||
}
|
||||
|
||||
renderMessage(c, models.AlertRuleUpdateNotifyGroups(f.Ids, f.NotifyGroups, f.NotifyUsers, me.Username))
|
||||
}
|
||||
|
||||
type alertRuleNotifyChannelsForm struct {
|
||||
Ids []int64 `json:"ids"`
|
||||
NotifyChannels string `json:"notify_channels"`
|
||||
}
|
||||
|
||||
func alertRuleNotifyChannelsPut(c *gin.Context) {
|
||||
var f alertRuleNotifyChannelsForm
|
||||
bind(c, &f)
|
||||
me := loginUser(c).MustPerm("alert_rule_modify")
|
||||
if len(f.Ids) == 0 {
|
||||
bomb(http.StatusBadRequest, "ids is empty")
|
||||
}
|
||||
|
||||
for _, id := range f.Ids {
|
||||
alertRule := AlertRule(id)
|
||||
arg := AlertRuleGroup(alertRule.GroupId)
|
||||
alertRuleWritePermCheck(arg, me)
|
||||
}
|
||||
|
||||
renderMessage(c, models.AlertRuleUpdateNotifyChannels(f.Ids, f.NotifyChannels, me.Username))
|
||||
}
|
||||
|
||||
type alertRuleAppendTagsForm struct {
|
||||
Ids []int64 `json:"ids"`
|
||||
AppendTags string `json:"append_tags"`
|
||||
}
|
||||
|
||||
func alertRuleAppendTagsPut(c *gin.Context) {
|
||||
var f alertRuleAppendTagsForm
|
||||
bind(c, &f)
|
||||
me := loginUser(c).MustPerm("alert_rule_modify")
|
||||
if len(f.Ids) == 0 {
|
||||
bomb(http.StatusBadRequest, "ids is empty")
|
||||
}
|
||||
|
||||
for _, id := range f.Ids {
|
||||
alertRule := AlertRule(id)
|
||||
arg := AlertRuleGroup(alertRule.GroupId)
|
||||
alertRuleWritePermCheck(arg, me)
|
||||
}
|
||||
|
||||
renderMessage(c, models.AlertRuleUpdateAppendTags(f.Ids, f.AppendTags, me.Username))
|
||||
}
|
||||
|
||||
func alertRuleDel(c *gin.Context) {
|
||||
me := loginUser(c).MustPerm("alert_rule_delete")
|
||||
alertRule := AlertRule(urlParamInt64(c, "id"))
|
||||
arg := AlertRuleGroup(alertRule.GroupId)
|
||||
alertRuleWritePermCheck(arg, me)
|
||||
|
||||
renderMessage(c, alertRule.Del())
|
||||
}
|
||||
|
||||
func notifyChannelsGet(c *gin.Context) {
|
||||
renderData(c, config.Config.NotifyChannels, nil)
|
||||
}
|
||||
|
||||
func alertRuleFillUserAndGroups(alertRule *models.AlertRule) {
|
||||
uidStrs := strings.Fields(alertRule.NotifyUsers)
|
||||
userlen := len(uidStrs)
|
||||
users := make([]*models.User, 0, userlen)
|
||||
if userlen > 0 {
|
||||
// 是否有用户已经被删除的情况出现
|
||||
userMiss := false
|
||||
|
||||
for _, uidStr := range uidStrs {
|
||||
uid, err := strconv.ParseInt(uidStr, 10, 64)
|
||||
if err != nil {
|
||||
userMiss = true
|
||||
continue
|
||||
}
|
||||
|
||||
user := cache.UserCache.GetById(uid)
|
||||
if user != nil {
|
||||
users = append(users, user)
|
||||
continue
|
||||
}
|
||||
|
||||
// uid在cache里找不到,可能是还没来得及缓存,也可能是被删除了
|
||||
// 去查一下数据库,如果确实找不到了,就更新一下
|
||||
user, err = models.UserGetById(uid)
|
||||
if err != nil {
|
||||
logger.Error("UserGetById fail:", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if user != nil {
|
||||
users = append(users, user)
|
||||
} else {
|
||||
userMiss = true
|
||||
}
|
||||
}
|
||||
|
||||
if userMiss {
|
||||
userIdsNew := make([]string, len(users))
|
||||
for i := 0; i < len(users); i++ {
|
||||
userIdsNew[i] = fmt.Sprint(users[i].Id)
|
||||
}
|
||||
|
||||
alertRule.NotifyUsers = strings.Join(userIdsNew, " ")
|
||||
alertRule.UpdateAt = time.Now().Unix()
|
||||
alertRule.Update("notify_users", "update_at")
|
||||
}
|
||||
}
|
||||
|
||||
// 最终存活的user列表,赋值给alertRule
|
||||
alertRule.NotifyUsersDetail = users
|
||||
|
||||
gidStrs := strings.Fields(alertRule.NotifyGroups)
|
||||
grplen := len(gidStrs)
|
||||
grps := make([]*models.UserGroup, 0, grplen)
|
||||
|
||||
if grplen > 0 {
|
||||
grpMiss := false
|
||||
|
||||
for _, gidStr := range gidStrs {
|
||||
gid, err := strconv.ParseInt(gidStr, 10, 64)
|
||||
if err != nil {
|
||||
grpMiss = true
|
||||
continue
|
||||
}
|
||||
|
||||
grp := cache.UserGroupCache.GetBy(gid)
|
||||
if grp != nil {
|
||||
grps = append(grps, grp)
|
||||
continue
|
||||
}
|
||||
|
||||
grp, err = models.UserGroupGet("id=?", gid)
|
||||
if err != nil {
|
||||
logger.Error("UserGroupGet fail:", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if grp != nil {
|
||||
grps = append(grps, grp)
|
||||
} else {
|
||||
grpMiss = true
|
||||
}
|
||||
}
|
||||
|
||||
if grpMiss {
|
||||
grpIdsNew := make([]string, len(grps))
|
||||
for i := 0; i < len(grps); i++ {
|
||||
grpIdsNew[i] = fmt.Sprint(grps[i].Id)
|
||||
}
|
||||
|
||||
alertRule.NotifyGroups = strings.Join(grpIdsNew, " ")
|
||||
alertRule.UpdateAt = time.Now().Unix()
|
||||
alertRule.Update("notify_groups", "update_at")
|
||||
}
|
||||
}
|
||||
|
||||
alertRule.NotifyGroupsDetail = grps
|
||||
}
|
|
@ -1,191 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/cache"
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
func alertRuleGroupGets(c *gin.Context) {
|
||||
limit := queryInt(c, "limit", defaultLimit)
|
||||
query := queryStr(c, "query", "")
|
||||
|
||||
total, err := models.AlertRuleGroupTotal(query)
|
||||
dangerous(err)
|
||||
|
||||
list, err := models.AlertRuleGroupGets(query, limit, offset(c, limit))
|
||||
dangerous(err)
|
||||
|
||||
renderData(c, gin.H{
|
||||
"list": list,
|
||||
"total": total,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func alertRuleGroupFavoriteGet(c *gin.Context) {
|
||||
lst, err := loginUser(c).FavoriteAlertRuleGroups()
|
||||
renderData(c, lst, err)
|
||||
}
|
||||
|
||||
type alertRuleGroupForm struct {
|
||||
Name string `json:"name"`
|
||||
UserGroupIds string `json:"user_group_ids"`
|
||||
}
|
||||
|
||||
func alertRuleGroupAdd(c *gin.Context) {
|
||||
var f alertRuleGroupForm
|
||||
bind(c, &f)
|
||||
|
||||
me := loginUser(c).MustPerm("alert_rule_group_create")
|
||||
|
||||
arg := models.AlertRuleGroup{
|
||||
Name: f.Name,
|
||||
UserGroupIds: f.UserGroupIds,
|
||||
CreateBy: me.Username,
|
||||
UpdateBy: me.Username,
|
||||
}
|
||||
|
||||
err := arg.Add()
|
||||
if err == nil {
|
||||
// 我创建的,顺便设置为我关注的
|
||||
models.AlertRuleGroupFavoriteAdd(arg.Id, me.Id)
|
||||
}
|
||||
|
||||
renderMessage(c, err)
|
||||
}
|
||||
|
||||
func alertRuleGroupGet(c *gin.Context) {
|
||||
alertRuleGroup := AlertRuleGroup(urlParamInt64(c, "id"))
|
||||
alertRuleGroup.FillUserGroups()
|
||||
renderData(c, alertRuleGroup, nil)
|
||||
}
|
||||
|
||||
func alertRuleOfGroupGet(c *gin.Context) {
|
||||
ars, err := models.AlertRulesOfGroup(urlParamInt64(c, "id"))
|
||||
for i := range ars {
|
||||
alertRuleFillUserAndGroups(&ars[i])
|
||||
}
|
||||
|
||||
renderData(c, ars, err)
|
||||
}
|
||||
|
||||
func alertRuleOfGroupDel(c *gin.Context) {
|
||||
var f idsForm
|
||||
bind(c, &f)
|
||||
f.Validate()
|
||||
|
||||
me := loginUser(c).MustPerm("alert_rule_delete")
|
||||
|
||||
// 可能大部分alert_rule都来自同一个alert_rule_group,所以权限判断可以无需重复判断
|
||||
cachePerm := make(map[string]struct{})
|
||||
|
||||
for i := 0; i < len(f.Ids); i++ {
|
||||
ar := AlertRule(f.Ids[i])
|
||||
|
||||
cacheKey := fmt.Sprintf("%d,%d", f.Ids[i], ar.GroupId)
|
||||
if _, has := cachePerm[cacheKey]; has {
|
||||
continue
|
||||
}
|
||||
|
||||
arg := AlertRuleGroup(ar.GroupId)
|
||||
alertRuleWritePermCheck(arg, me)
|
||||
cachePerm[cacheKey] = struct{}{}
|
||||
}
|
||||
|
||||
renderMessage(c, models.AlertRulesDel(f.Ids))
|
||||
}
|
||||
|
||||
func alertRuleGroupPut(c *gin.Context) {
|
||||
var f alertRuleGroupForm
|
||||
bind(c, &f)
|
||||
|
||||
me := loginUser(c).MustPerm("alert_rule_group_modify")
|
||||
arg := AlertRuleGroup(urlParamInt64(c, "id"))
|
||||
alertRuleWritePermCheck(arg, me)
|
||||
|
||||
if arg.Name != f.Name {
|
||||
num, err := models.AlertRuleGroupCount("name=? and id<>?", f.Name, arg.Id)
|
||||
dangerous(err)
|
||||
|
||||
if num > 0 {
|
||||
bomb(200, "AlertRuleGroup %s already exists", f.Name)
|
||||
}
|
||||
}
|
||||
|
||||
arg.Name = f.Name
|
||||
arg.UserGroupIds = f.UserGroupIds
|
||||
arg.UpdateBy = me.Username
|
||||
arg.UpdateAt = time.Now().Unix()
|
||||
|
||||
renderMessage(c, arg.Update("name", "update_by", "update_at", "user_group_ids"))
|
||||
}
|
||||
|
||||
func alertRuleGroupDel(c *gin.Context) {
|
||||
me := loginUser(c).MustPerm("alert_rule_group_delete")
|
||||
arg := AlertRuleGroup(urlParamInt64(c, "id"))
|
||||
alertRuleWritePermCheck(arg, me)
|
||||
|
||||
renderMessage(c, arg.Del())
|
||||
}
|
||||
|
||||
func alertRuleGroupFavoriteAdd(c *gin.Context) {
|
||||
me := loginUser(c)
|
||||
arg := AlertRuleGroup(urlParamInt64(c, "id"))
|
||||
renderMessage(c, models.AlertRuleGroupFavoriteAdd(arg.Id, me.Id))
|
||||
}
|
||||
|
||||
func alertRuleGroupFavoriteDel(c *gin.Context) {
|
||||
me := loginUser(c)
|
||||
arg := AlertRuleGroup(urlParamInt64(c, "id"))
|
||||
renderMessage(c, models.AlertRuleGroupFavoriteDel(arg.Id, me.Id))
|
||||
}
|
||||
|
||||
func alertRuleWritePermCheck(alertRuleGroup *models.AlertRuleGroup, user *models.User) {
|
||||
roles := strings.Fields(user.RolesForDB)
|
||||
for i := 0; i < len(roles); i++ {
|
||||
if roles[i] == "Admin" {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
gids := IdsInt64(alertRuleGroup.UserGroupIds)
|
||||
if len(gids) == 0 {
|
||||
// 压根没有配置管理团队,表示对所有Standard角色放开,那就不校验了
|
||||
return
|
||||
}
|
||||
|
||||
for _, gid := range gids {
|
||||
if cache.UserGroupMember.Exists(gid, user.Id) {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
bomb(http.StatusForbidden, "no permission")
|
||||
}
|
||||
|
||||
func IdsInt64(ids string) []int64 {
|
||||
if ids == "" {
|
||||
return []int64{}
|
||||
}
|
||||
|
||||
arr := strings.Fields(ids)
|
||||
count := len(arr)
|
||||
ret := make([]int64, 0, count)
|
||||
for i := 0; i < count; i++ {
|
||||
if arr[i] != "" {
|
||||
id, err := strconv.ParseInt(arr[i], 10, 64)
|
||||
if err == nil {
|
||||
ret = append(ret, id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
|
@ -1,92 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"github.com/gin-contrib/sessions"
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/config"
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
type loginForm struct {
|
||||
Username string `json:"username"`
|
||||
Password string `json:"password"`
|
||||
}
|
||||
|
||||
func loginPost(c *gin.Context) {
|
||||
var f loginForm
|
||||
bind(c, &f)
|
||||
|
||||
user, err1 := models.PassLogin(f.Username, f.Password)
|
||||
if err1 == nil {
|
||||
if user.Status == 1 {
|
||||
renderMessage(c, "User disabled")
|
||||
return
|
||||
}
|
||||
session := sessions.Default(c)
|
||||
session.Set("username", f.Username)
|
||||
session.Save()
|
||||
renderData(c, user, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// password login fail, try ldap
|
||||
if config.Config.LDAP.Enable {
|
||||
user, err2 := models.LdapLogin(f.Username, f.Password)
|
||||
if err2 == nil {
|
||||
if user.Status == 1 {
|
||||
renderMessage(c, "User disabled")
|
||||
return
|
||||
}
|
||||
session := sessions.Default(c)
|
||||
session.Set("username", f.Username)
|
||||
session.Save()
|
||||
renderData(c, user, nil)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// password and ldap both fail
|
||||
renderMessage(c, err1)
|
||||
}
|
||||
|
||||
func logoutGet(c *gin.Context) {
|
||||
session := sessions.Default(c)
|
||||
session.Set("username", "")
|
||||
session.Save()
|
||||
renderMessage(c, nil)
|
||||
}
|
||||
|
||||
func canDoOpByName(c *gin.Context) {
|
||||
user, err := models.UserGetByUsername(queryStr(c, "name"))
|
||||
dangerous(err)
|
||||
|
||||
if user == nil {
|
||||
renderData(c, false, err)
|
||||
return
|
||||
}
|
||||
|
||||
can, err := user.CanDo(queryStr(c, "op"))
|
||||
renderData(c, can, err)
|
||||
}
|
||||
|
||||
func canDoOpByToken(c *gin.Context) {
|
||||
userToken, err := models.UserTokenGet("token=?", queryStr(c, "token"))
|
||||
dangerous(err)
|
||||
|
||||
if userToken == nil {
|
||||
renderData(c, false, err)
|
||||
return
|
||||
}
|
||||
|
||||
user, err := models.UserGetByUsername(userToken.Username)
|
||||
dangerous(err)
|
||||
|
||||
if user == nil {
|
||||
renderData(c, false, err)
|
||||
return
|
||||
}
|
||||
|
||||
can, err := user.CanDo(queryStr(c, "op"))
|
||||
renderData(c, can, err)
|
||||
}
|
|
@ -1,82 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
func chartGets(c *gin.Context) {
|
||||
objs, err := models.ChartGets(urlParamInt64(c, "id"))
|
||||
renderData(c, objs, err)
|
||||
}
|
||||
|
||||
type chartForm struct {
|
||||
Configs string `json:"configs"`
|
||||
Weight int `json:"weight"`
|
||||
}
|
||||
|
||||
func chartAdd(c *gin.Context) {
|
||||
var f chartForm
|
||||
bind(c, &f)
|
||||
|
||||
loginUser(c).MustPerm("dashboard_modify")
|
||||
|
||||
cg := ChartGroup(urlParamInt64(c, "id"))
|
||||
ct := models.Chart{
|
||||
GroupId: cg.Id,
|
||||
Configs: f.Configs,
|
||||
Weight: f.Weight,
|
||||
}
|
||||
|
||||
dangerous(ct.Add())
|
||||
|
||||
renderData(c, ct, nil)
|
||||
}
|
||||
|
||||
type chartPutForm struct {
|
||||
Configs string `json:"configs"`
|
||||
}
|
||||
|
||||
func chartPut(c *gin.Context) {
|
||||
var f chartPutForm
|
||||
bind(c, &f)
|
||||
|
||||
loginUser(c).MustPerm("dashboard_modify")
|
||||
|
||||
ct := Chart(urlParamInt64(c, "id"))
|
||||
ct.Configs = f.Configs
|
||||
|
||||
dangerous(ct.Update("configs"))
|
||||
|
||||
renderData(c, ct, nil)
|
||||
}
|
||||
|
||||
func chartDel(c *gin.Context) {
|
||||
loginUser(c).MustPerm("dashboard_modify")
|
||||
renderMessage(c, Chart(urlParamInt64(c, "id")).Del())
|
||||
}
|
||||
|
||||
type chartConfig struct {
|
||||
Id int64 `json:"id"`
|
||||
GroupId int64 `json:"group_id"`
|
||||
Configs string `json:"configs"`
|
||||
}
|
||||
|
||||
func chartConfigsPut(c *gin.Context) {
|
||||
var arr []chartConfig
|
||||
bind(c, &arr)
|
||||
|
||||
loginUser(c).MustPerm("dashboard_modify")
|
||||
|
||||
for i := 0; i < len(arr); i++ {
|
||||
ct := Chart(arr[i].Id)
|
||||
ct.Configs = arr[i].Configs
|
||||
if arr[i].GroupId > 0 {
|
||||
ct.GroupId = arr[i].GroupId
|
||||
}
|
||||
dangerous(ct.Update("configs", "group_id"))
|
||||
}
|
||||
|
||||
renderMessage(c, nil)
|
||||
}
|
|
@ -1,55 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
func chartGroupGets(c *gin.Context) {
|
||||
objs, err := models.ChartGroupGets(urlParamInt64(c, "id"))
|
||||
renderData(c, objs, err)
|
||||
}
|
||||
|
||||
type chartGroupForm struct {
|
||||
Name string `json:"name"`
|
||||
Weight int `json:"weight"`
|
||||
}
|
||||
|
||||
func chartGroupAdd(c *gin.Context) {
|
||||
var f chartGroupForm
|
||||
bind(c, &f)
|
||||
|
||||
loginUser(c).MustPerm("dashboard_modify")
|
||||
|
||||
d := Dashboard(urlParamInt64(c, "id"))
|
||||
|
||||
cg := models.ChartGroup{
|
||||
DashboardId: d.Id,
|
||||
Name: f.Name,
|
||||
Weight: f.Weight,
|
||||
}
|
||||
|
||||
dangerous(cg.Add())
|
||||
|
||||
renderData(c, cg, nil)
|
||||
}
|
||||
|
||||
func chartGroupsPut(c *gin.Context) {
|
||||
var arr []models.ChartGroup
|
||||
bind(c, &arr)
|
||||
|
||||
loginUser(c).MustPerm("dashboard_modify")
|
||||
|
||||
for i := 0; i < len(arr); i++ {
|
||||
dangerous(arr[i].Update("name", "weight"))
|
||||
}
|
||||
|
||||
renderMessage(c, nil)
|
||||
}
|
||||
|
||||
func chartGroupDel(c *gin.Context) {
|
||||
loginUser(c).MustPerm("dashboard_modify")
|
||||
cg := models.ChartGroup{Id: urlParamInt64(c, "id")}
|
||||
renderMessage(c, cg.Del())
|
||||
}
|
|
@ -1,50 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
type chartTmpForm struct {
|
||||
Configs string `json:"configs"`
|
||||
}
|
||||
|
||||
func chartTmpAdd(c *gin.Context) {
|
||||
ids := []int64{}
|
||||
|
||||
var forms []chartTmpForm
|
||||
bind(c, &forms)
|
||||
|
||||
for _, f := range forms {
|
||||
chart := models.ChartTmp{
|
||||
Configs: f.Configs,
|
||||
CreateBy: loginUsername(c),
|
||||
CreateAt: time.Now().Unix(),
|
||||
}
|
||||
dangerous(chart.Add())
|
||||
ids = append(ids, chart.Id)
|
||||
}
|
||||
|
||||
renderData(c, ids, nil)
|
||||
}
|
||||
|
||||
func chartTmpGets(c *gin.Context) {
|
||||
objs := []*models.ChartTmp{}
|
||||
idStr := queryStr(c, "ids")
|
||||
ids := strings.Split(idStr, ",")
|
||||
for _, id := range ids {
|
||||
i, err := strconv.ParseInt(id, 10, 64)
|
||||
dangerous(err)
|
||||
|
||||
obj, err := models.ChartTmpGet("id=?", i)
|
||||
dangerous(err)
|
||||
objs = append(objs, obj)
|
||||
}
|
||||
|
||||
renderData(c, objs, nil)
|
||||
}
|
|
@ -1,152 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
func classpathListGets(c *gin.Context) {
|
||||
limit := queryInt(c, "limit", defaultLimit)
|
||||
query := queryStr(c, "query", "")
|
||||
|
||||
total, err := models.ClasspathTotal(query)
|
||||
dangerous(err)
|
||||
|
||||
list, err := models.ClasspathGets(query, limit, offset(c, limit))
|
||||
dangerous(err)
|
||||
|
||||
renderData(c, gin.H{
|
||||
"list": list,
|
||||
"total": total,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
//此api暂时不对外开放
|
||||
func classpathListNodeGets(c *gin.Context) {
|
||||
query := queryStr(c, "query", "")
|
||||
|
||||
list, err := models.ClasspathNodeGets(query)
|
||||
dangerous(err)
|
||||
|
||||
renderData(c, list, nil)
|
||||
}
|
||||
|
||||
func classpathListNodeGetsById(c *gin.Context) {
|
||||
cp := Classpath(urlParamInt64(c, "id"))
|
||||
children, err := cp.DirectChildren()
|
||||
renderData(c, children, err)
|
||||
}
|
||||
|
||||
func classpathFavoriteGet(c *gin.Context) {
|
||||
lst, err := loginUser(c).FavoriteClasspaths()
|
||||
renderData(c, lst, err)
|
||||
}
|
||||
|
||||
type classpathForm struct {
|
||||
Path string `json:"path"`
|
||||
Note string `json:"note"`
|
||||
}
|
||||
|
||||
func classpathAdd(c *gin.Context) {
|
||||
var f classpathForm
|
||||
bind(c, &f)
|
||||
|
||||
me := loginUser(c).MustPerm("classpath_create")
|
||||
|
||||
cp := models.Classpath{
|
||||
Path: f.Path,
|
||||
Note: f.Note,
|
||||
Preset: 0,
|
||||
CreateBy: me.Username,
|
||||
UpdateBy: me.Username,
|
||||
}
|
||||
|
||||
renderMessage(c, cp.Add())
|
||||
}
|
||||
|
||||
func classpathPut(c *gin.Context) {
|
||||
var f classpathForm
|
||||
bind(c, &f)
|
||||
|
||||
me := loginUser(c).MustPerm("classpath_modify")
|
||||
cp := Classpath(urlParamInt64(c, "id"))
|
||||
|
||||
if cp.Path != f.Path {
|
||||
num, err := models.ClasspathCount("path=? and id<>?", f.Path, cp.Id)
|
||||
dangerous(err)
|
||||
|
||||
if num > 0 {
|
||||
bomb(200, "Classpath %s already exists", f.Path)
|
||||
}
|
||||
}
|
||||
|
||||
cp.Path = f.Path
|
||||
cp.Note = f.Note
|
||||
cp.UpdateBy = me.Username
|
||||
cp.UpdateAt = time.Now().Unix()
|
||||
|
||||
renderMessage(c, cp.Update("path", "note", "update_by", "update_at"))
|
||||
}
|
||||
|
||||
func classpathDel(c *gin.Context) {
|
||||
loginUser(c).MustPerm("classpath_delete")
|
||||
|
||||
cp := Classpath(urlParamInt64(c, "id"))
|
||||
if cp.Preset == 1 {
|
||||
bomb(200, "Preset classpath %s cannot delete", cp.Path)
|
||||
}
|
||||
|
||||
renderMessage(c, cp.Del())
|
||||
}
|
||||
|
||||
func classpathAddResources(c *gin.Context) {
|
||||
var arr []string
|
||||
bind(c, &arr)
|
||||
|
||||
me := loginUser(c).MustPerm("classpath_add_resource")
|
||||
cp := Classpath(urlParamInt64(c, "id"))
|
||||
|
||||
dangerous(cp.AddResources(arr))
|
||||
|
||||
cp.UpdateAt = time.Now().Unix()
|
||||
cp.UpdateBy = me.Username
|
||||
cp.Update("update_at", "update_by")
|
||||
|
||||
renderMessage(c, nil)
|
||||
}
|
||||
|
||||
func classpathDelResources(c *gin.Context) {
|
||||
var arr []string
|
||||
bind(c, &arr)
|
||||
classpathId := urlParamInt64(c, "id")
|
||||
me := loginUser(c).MustPerm("classpath_del_resource")
|
||||
|
||||
if classpathId == 1 {
|
||||
bomb(200, _s("Resource cannot delete in preset classpath"))
|
||||
}
|
||||
|
||||
cp := Classpath(classpathId)
|
||||
|
||||
dangerous(cp.DelResources(arr))
|
||||
|
||||
cp.UpdateAt = time.Now().Unix()
|
||||
cp.UpdateBy = me.Username
|
||||
cp.Update("update_at", "update_by")
|
||||
|
||||
renderMessage(c, nil)
|
||||
}
|
||||
|
||||
func classpathFavoriteAdd(c *gin.Context) {
|
||||
me := loginUser(c)
|
||||
cp := Classpath(urlParamInt64(c, "id"))
|
||||
renderMessage(c, models.ClasspathFavoriteAdd(cp.Id, me.Id))
|
||||
}
|
||||
|
||||
func classpathFavoriteDel(c *gin.Context) {
|
||||
me := loginUser(c)
|
||||
cp := Classpath(urlParamInt64(c, "id"))
|
||||
renderMessage(c, models.ClasspathFavoriteDel(cp.Id, me.Id))
|
||||
}
|
|
@ -1,283 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/cache"
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
type collectRuleForm struct {
|
||||
ClasspathId int64 `json:"classpath_id"`
|
||||
PrefixMatch int `json:"prefix_match"`
|
||||
Name string `json:"name"`
|
||||
Note string `json:"note"`
|
||||
Step int `json:"step"`
|
||||
Type string `json:"type"`
|
||||
Data string `json:"data"`
|
||||
AppendTags string `json:"append_tags"`
|
||||
}
|
||||
|
||||
func collectRuleAdd(c *gin.Context) {
|
||||
var f collectRuleForm
|
||||
bind(c, &f)
|
||||
|
||||
me := loginUser(c).MustPerm("collect_rule_create")
|
||||
|
||||
cr := models.CollectRule{
|
||||
ClasspathId: f.ClasspathId,
|
||||
PrefixMatch: f.PrefixMatch,
|
||||
Name: f.Name,
|
||||
Note: f.Note,
|
||||
Step: f.Step,
|
||||
Type: f.Type,
|
||||
Data: f.Data,
|
||||
AppendTags: f.AppendTags,
|
||||
CreateBy: me.Username,
|
||||
UpdateBy: me.Username,
|
||||
}
|
||||
|
||||
renderMessage(c, cr.Add())
|
||||
}
|
||||
|
||||
func collectRulesAdd(c *gin.Context) {
|
||||
var forms []collectRuleForm
|
||||
bind(c, &forms)
|
||||
|
||||
me := loginUser(c).MustPerm("collect_rule_create")
|
||||
|
||||
for _, f := range forms {
|
||||
cr := models.CollectRule{
|
||||
ClasspathId: f.ClasspathId,
|
||||
PrefixMatch: f.PrefixMatch,
|
||||
Name: f.Name,
|
||||
Note: f.Note,
|
||||
Step: f.Step,
|
||||
Type: f.Type,
|
||||
Data: f.Data,
|
||||
AppendTags: f.AppendTags,
|
||||
CreateBy: me.Username,
|
||||
UpdateBy: me.Username,
|
||||
}
|
||||
|
||||
dangerous(cr.Add())
|
||||
}
|
||||
|
||||
renderMessage(c, nil)
|
||||
}
|
||||
|
||||
func collectRulePut(c *gin.Context) {
|
||||
var f collectRuleForm
|
||||
bind(c, &f)
|
||||
|
||||
me := loginUser(c).MustPerm("collect_rule_modify")
|
||||
cr := CollectRule(urlParamInt64(c, "id"))
|
||||
|
||||
cr.PrefixMatch = f.PrefixMatch
|
||||
cr.Name = f.Name
|
||||
cr.Note = f.Note
|
||||
cr.Step = f.Step
|
||||
cr.Type = f.Type
|
||||
cr.Data = f.Data
|
||||
cr.AppendTags = f.AppendTags
|
||||
cr.UpdateAt = time.Now().Unix()
|
||||
cr.UpdateBy = me.Username
|
||||
|
||||
renderMessage(c, cr.Update(
|
||||
"prefix_match",
|
||||
"name",
|
||||
"note",
|
||||
"step",
|
||||
"type",
|
||||
"data",
|
||||
"update_at",
|
||||
"update_by",
|
||||
"append_tags",
|
||||
))
|
||||
}
|
||||
|
||||
func collectRuleDel(c *gin.Context) {
|
||||
var f idsForm
|
||||
bind(c, &f)
|
||||
f.Validate()
|
||||
loginUser(c).MustPerm("collect_rule_delete")
|
||||
renderMessage(c, models.CollectRulesDel(f.Ids))
|
||||
}
|
||||
|
||||
func collectRuleGets(c *gin.Context) {
|
||||
classpathId := urlParamInt64(c, "id")
|
||||
|
||||
where := "classpath_id = ?"
|
||||
param := []interface{}{classpathId}
|
||||
|
||||
typ := queryStr(c, "type", "")
|
||||
if typ != "" {
|
||||
where += " and type = ?"
|
||||
param = append(param, typ)
|
||||
}
|
||||
|
||||
objs, err := models.CollectRuleGets(where, param...)
|
||||
renderData(c, objs, err)
|
||||
}
|
||||
|
||||
func collectRuleGetsByIdent(c *gin.Context) {
|
||||
ident := queryStr(c, "ident")
|
||||
|
||||
objs := cache.CollectRulesOfIdent.GetBy(ident)
|
||||
renderData(c, objs, nil)
|
||||
}
|
||||
|
||||
type Summary struct {
|
||||
LatestUpdatedAt int64 `json:"latest_updated_at"`
|
||||
Total int `json:"total"`
|
||||
}
|
||||
|
||||
func collectRuleSummaryGetByIdent(c *gin.Context) {
|
||||
ident := queryStr(c, "ident")
|
||||
var summary Summary
|
||||
objs := cache.CollectRulesOfIdent.GetBy(ident)
|
||||
total := len(objs)
|
||||
if total > 0 {
|
||||
summary.Total = total
|
||||
var latestUpdatedAt int64
|
||||
for _, obj := range objs {
|
||||
if latestUpdatedAt < obj.UpdateAt {
|
||||
latestUpdatedAt = obj.UpdateAt
|
||||
}
|
||||
}
|
||||
summary.LatestUpdatedAt = latestUpdatedAt
|
||||
}
|
||||
|
||||
renderData(c, summary, nil)
|
||||
}
|
||||
|
||||
type RegExpCheck struct {
|
||||
Success bool `json:"success"`
|
||||
Data []map[string]string `json:"tags"`
|
||||
}
|
||||
|
||||
func regExpCheck(c *gin.Context) {
|
||||
param := make(map[string]string)
|
||||
dangerous(c.ShouldBind(¶m))
|
||||
|
||||
ret := &RegExpCheck{
|
||||
Success: true,
|
||||
Data: make([]map[string]string, 0),
|
||||
}
|
||||
|
||||
calcMethod := param["func"]
|
||||
if calcMethod == "" {
|
||||
tmp := map[string]string{"func": "is empty"}
|
||||
ret.Data = append(ret.Data, tmp)
|
||||
renderData(c, ret, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// 处理主正则
|
||||
if re, ok := param["re"]; !ok || re == "" {
|
||||
tmp := map[string]string{"re": "regex does not exist or is empty"}
|
||||
ret.Data = append(ret.Data, tmp)
|
||||
renderData(c, ret, nil)
|
||||
return
|
||||
}
|
||||
|
||||
// 匹配主正则
|
||||
suc, reRes, isSub := checkRegex(param["re"], param["log"])
|
||||
if !suc {
|
||||
ret.Success = false
|
||||
reRes = genErrMsg(param["re"])
|
||||
ret.Data = append(ret.Data, map[string]string{"re": reRes})
|
||||
renderData(c, ret, nil)
|
||||
return
|
||||
}
|
||||
if calcMethod == "histogram" && !isSub {
|
||||
ret.Success = false
|
||||
reRes = genSubErrMsg(param["re"])
|
||||
ret.Data = append(ret.Data, map[string]string{"re": reRes})
|
||||
renderData(c, ret, nil)
|
||||
return
|
||||
}
|
||||
|
||||
ret.Data = append(ret.Data, map[string]string{"re": reRes})
|
||||
// 处理tags
|
||||
var nonTagKey = map[string]bool{
|
||||
"re": true,
|
||||
"log": true,
|
||||
"func": true,
|
||||
}
|
||||
|
||||
for tagk, pat := range param {
|
||||
// 如果不是tag,就继续循环
|
||||
if _, ok := nonTagKey[tagk]; ok {
|
||||
continue
|
||||
}
|
||||
suc, tagRes, isSub := checkRegex(pat, param["log"])
|
||||
if !suc {
|
||||
// 正则错误
|
||||
ret.Success = false
|
||||
tagRes = genErrMsg(pat)
|
||||
} else if !isSub {
|
||||
// 未匹配出子串
|
||||
ret.Success = false
|
||||
tagRes = genSubErrMsg(pat)
|
||||
} else if includeIllegalChar(tagRes) || includeIllegalChar(tagk) {
|
||||
// 保留字报错
|
||||
ret.Success = false
|
||||
tagRes = genIllegalCharErrMsg()
|
||||
}
|
||||
|
||||
tmp := map[string]string{tagk: tagRes}
|
||||
ret.Data = append(ret.Data, tmp)
|
||||
}
|
||||
|
||||
renderData(c, ret, nil)
|
||||
}
|
||||
|
||||
// 出错信息直接放在body里
|
||||
func checkRegex(pat string, log string) (succ bool, result string, isSub bool) {
|
||||
if pat == "" {
|
||||
return false, "", false
|
||||
}
|
||||
|
||||
reg, err := regexp.Compile(pat)
|
||||
if err != nil {
|
||||
return false, "", false
|
||||
}
|
||||
|
||||
res := reg.FindStringSubmatch(log)
|
||||
switch len(res) {
|
||||
// 没查到
|
||||
case 0:
|
||||
return false, "", false
|
||||
// 没查到括号内的串,返回整个匹配串
|
||||
case 1:
|
||||
return true, res[0], false
|
||||
// 查到了,默认取第一个串
|
||||
default:
|
||||
return true, res[1], true
|
||||
}
|
||||
}
|
||||
|
||||
func includeIllegalChar(s string) bool {
|
||||
illegalChars := ":,=\r\n\t"
|
||||
return strings.ContainsAny(s, illegalChars)
|
||||
}
|
||||
|
||||
// 生成返回错误信息
|
||||
func genErrMsg(pattern string) string {
|
||||
return _s("Regexp %s matching failed", pattern)
|
||||
}
|
||||
|
||||
// 生成子串匹配错误信息
|
||||
func genSubErrMsg(pattern string) string {
|
||||
return _s("Regexp %s matched, but cannot get substring()", pattern)
|
||||
}
|
||||
|
||||
// 生成子串匹配错误信息
|
||||
func genIllegalCharErrMsg() string {
|
||||
return _s(`TagKey or TagValue contains illegal characters[:,/=\r\n\t]`)
|
||||
}
|
|
@ -1,244 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
func dashboardGets(c *gin.Context) {
|
||||
limit := queryInt(c, "limit", defaultLimit)
|
||||
query := queryStr(c, "query", "")
|
||||
onlyfavorite := queryBool(c, "onlyfavorite", false)
|
||||
|
||||
me := loginUser(c)
|
||||
ids, err := me.FavoriteDashboardIds()
|
||||
dangerous(err)
|
||||
|
||||
// 我的收藏是空的,所以直接返回空列表
|
||||
if onlyfavorite && len(ids) == 0 {
|
||||
renderZeroPage(c)
|
||||
return
|
||||
}
|
||||
|
||||
total, err := models.DashboardTotal(onlyfavorite, ids, query)
|
||||
dangerous(err)
|
||||
|
||||
list, err := models.DashboardGets(onlyfavorite, ids, query, limit, offset(c, limit))
|
||||
dangerous(err)
|
||||
|
||||
if onlyfavorite {
|
||||
for i := 0; i < len(list); i++ {
|
||||
list[i].Favorite = 1
|
||||
}
|
||||
} else {
|
||||
for i := 0; i < len(list); i++ {
|
||||
list[i].FillFavorite(ids)
|
||||
}
|
||||
}
|
||||
|
||||
renderData(c, gin.H{
|
||||
"list": list,
|
||||
"total": total,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func dashboardGet(c *gin.Context) {
|
||||
renderData(c, Dashboard(urlParamInt64(c, "id")), nil)
|
||||
}
|
||||
|
||||
type dashboardForm struct {
|
||||
Id int64 `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Tags string `json:"tags"`
|
||||
Configs string `json:"configs"`
|
||||
}
|
||||
|
||||
func dashboardAdd(c *gin.Context) {
|
||||
var f dashboardForm
|
||||
bind(c, &f)
|
||||
|
||||
me := loginUser(c).MustPerm("dashboard_create")
|
||||
|
||||
d := &models.Dashboard{
|
||||
Name: f.Name,
|
||||
Tags: f.Tags,
|
||||
Configs: f.Configs,
|
||||
CreateBy: me.Username,
|
||||
UpdateBy: me.Username,
|
||||
}
|
||||
|
||||
dangerous(d.Add())
|
||||
|
||||
renderData(c, d, nil)
|
||||
}
|
||||
|
||||
func dashboardPut(c *gin.Context) {
|
||||
var f dashboardForm
|
||||
bind(c, &f)
|
||||
|
||||
me := loginUser(c).MustPerm("dashboard_modify")
|
||||
d := Dashboard(urlParamInt64(c, "id"))
|
||||
|
||||
if d.Name != f.Name {
|
||||
num, err := models.DashboardCount("name=? and id<>?", f.Name, d.Id)
|
||||
dangerous(err)
|
||||
|
||||
if num > 0 {
|
||||
bomb(200, "Dashboard %s already exists", f.Name)
|
||||
}
|
||||
}
|
||||
|
||||
d.Name = f.Name
|
||||
d.Tags = f.Tags
|
||||
d.Configs = f.Configs
|
||||
d.UpdateAt = time.Now().Unix()
|
||||
d.UpdateBy = me.Username
|
||||
|
||||
dangerous(d.Update("name", "tags", "configs", "update_at", "update_by"))
|
||||
|
||||
renderData(c, d, nil)
|
||||
}
|
||||
|
||||
func dashboardClone(c *gin.Context) {
|
||||
var f dashboardForm
|
||||
bind(c, &f)
|
||||
|
||||
me := loginUser(c).MustPerm("dashboard_create")
|
||||
|
||||
d := &models.Dashboard{
|
||||
Name: f.Name,
|
||||
Tags: f.Tags,
|
||||
Configs: f.Configs,
|
||||
CreateBy: me.Username,
|
||||
UpdateBy: me.Username,
|
||||
}
|
||||
dangerous(d.AddOnly())
|
||||
|
||||
chartGroups, err := models.ChartGroupGets(f.Id)
|
||||
dangerous(err)
|
||||
for _, chartGroup := range chartGroups {
|
||||
charts, err := models.ChartGets(chartGroup.Id)
|
||||
dangerous(err)
|
||||
chartGroup.DashboardId = d.Id
|
||||
chartGroup.Id = 0
|
||||
dangerous(chartGroup.Add())
|
||||
|
||||
for _, chart := range charts {
|
||||
chart.Id = 0
|
||||
chart.GroupId = chartGroup.Id
|
||||
dangerous(chart.Add())
|
||||
}
|
||||
}
|
||||
|
||||
renderData(c, d, nil)
|
||||
}
|
||||
|
||||
func dashboardDel(c *gin.Context) {
|
||||
loginUser(c).MustPerm("dashboard_delete")
|
||||
renderMessage(c, Dashboard(urlParamInt64(c, "id")).Del())
|
||||
}
|
||||
|
||||
func dashboardFavoriteAdd(c *gin.Context) {
|
||||
me := loginUser(c)
|
||||
d := Dashboard(urlParamInt64(c, "id"))
|
||||
renderMessage(c, models.DashboardFavoriteAdd(d.Id, me.Id))
|
||||
}
|
||||
|
||||
func dashboardFavoriteDel(c *gin.Context) {
|
||||
me := loginUser(c)
|
||||
d := Dashboard(urlParamInt64(c, "id"))
|
||||
renderMessage(c, models.DashboardFavoriteDel(d.Id, me.Id))
|
||||
}
|
||||
|
||||
type ChartGroupDetail struct {
|
||||
Id int64 `json:"id"`
|
||||
DashboardId int64 `json:"dashboard_id"`
|
||||
Name string `json:"name"`
|
||||
Weight int `json:"weight"`
|
||||
Charts []models.Chart `json:"charts"`
|
||||
}
|
||||
|
||||
type DashboardDetail struct {
|
||||
Id int64 `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Tags string `json:"tags"`
|
||||
Configs string `json:"configs"`
|
||||
ChartGroups []ChartGroupDetail `json:"chart_groups"`
|
||||
}
|
||||
|
||||
func dashboardExport(c *gin.Context) {
|
||||
var f idsForm
|
||||
bind(c, &f)
|
||||
dashboards, err := models.DashboardGetsByIds(f.Ids)
|
||||
dangerous(err)
|
||||
|
||||
var details []DashboardDetail
|
||||
for _, databoard := range dashboards {
|
||||
detail := DashboardDetail{
|
||||
Name: databoard.Name,
|
||||
Tags: databoard.Tags,
|
||||
Configs: databoard.Configs,
|
||||
}
|
||||
|
||||
chartGroups, err := models.ChartGroupGets(databoard.Id)
|
||||
dangerous(err)
|
||||
|
||||
var chartGroupsDetail []ChartGroupDetail
|
||||
for _, chartGroup := range chartGroups {
|
||||
chartGroupDetail := ChartGroupDetail{
|
||||
Name: chartGroup.Name,
|
||||
Weight: chartGroup.Weight,
|
||||
}
|
||||
|
||||
charts, err := models.ChartGets(chartGroup.Id)
|
||||
dangerous(err)
|
||||
|
||||
chartGroupDetail.Charts = charts
|
||||
chartGroupsDetail = append(chartGroupsDetail, chartGroupDetail)
|
||||
}
|
||||
detail.ChartGroups = chartGroupsDetail
|
||||
details = append(details, detail)
|
||||
}
|
||||
|
||||
renderData(c, details, nil)
|
||||
}
|
||||
|
||||
func dashboardImport(c *gin.Context) {
|
||||
var details []DashboardDetail
|
||||
bind(c, &details)
|
||||
me := loginUser(c).MustPerm("dashboard_create")
|
||||
|
||||
for _, detail := range details {
|
||||
d := &models.Dashboard{
|
||||
Name: detail.Name,
|
||||
Tags: detail.Tags,
|
||||
Configs: detail.Configs,
|
||||
CreateBy: me.Username,
|
||||
UpdateBy: me.Username,
|
||||
}
|
||||
dangerous(d.AddOnly())
|
||||
|
||||
for _, chartGroup := range detail.ChartGroups {
|
||||
cg := models.ChartGroup{
|
||||
DashboardId: d.Id,
|
||||
Name: chartGroup.Name,
|
||||
Weight: chartGroup.Weight,
|
||||
}
|
||||
dangerous(cg.Add())
|
||||
|
||||
for _, chart := range chartGroup.Charts {
|
||||
c := models.Chart{
|
||||
GroupId: cg.Id,
|
||||
Configs: chart.Configs,
|
||||
Weight: chart.Weight,
|
||||
}
|
||||
dangerous(c.Add())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
renderMessage(c, nil)
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
func historyAlertEventGets(c *gin.Context) {
|
||||
stime := queryInt64(c, "stime", 0)
|
||||
etime := queryInt64(c, "etime", 0)
|
||||
hours := queryInt64(c, "hours", 0)
|
||||
now := time.Now().Unix()
|
||||
if hours != 0 {
|
||||
stime = now - 3600*hours
|
||||
etime = now + 3600*24
|
||||
}
|
||||
|
||||
if stime != 0 && etime == 0 {
|
||||
etime = now + 3600*24
|
||||
}
|
||||
|
||||
query := queryStr(c, "query", "")
|
||||
priority := queryInt(c, "priority", -1)
|
||||
status := queryInt(c, "status", -1)
|
||||
isRecovery := queryInt(c, "is_recovery", -1)
|
||||
limit := queryInt(c, "limit", defaultLimit)
|
||||
|
||||
total, err := models.HistoryAlertEventsTotal(stime, etime, query, status, isRecovery, priority)
|
||||
dangerous(err)
|
||||
|
||||
list, err := models.HistoryAlertEventGets(stime, etime, query, status, isRecovery, priority, limit, offset(c, limit))
|
||||
dangerous(err)
|
||||
|
||||
for i := 0; i < len(list); i++ {
|
||||
dangerous(list[i].FillObjs())
|
||||
}
|
||||
|
||||
if len(list) == 0 {
|
||||
renderZeroPage(c)
|
||||
return
|
||||
}
|
||||
|
||||
renderData(c, map[string]interface{}{
|
||||
"total": total,
|
||||
"list": list,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func historyAlertEventGet(c *gin.Context) {
|
||||
ae := HistoryAlertEvent(urlParamInt64(c, "id"))
|
||||
dangerous(ae.FillObjs())
|
||||
renderData(c, ae, nil)
|
||||
}
|
|
@ -1,82 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
func metricDescriptionGets(c *gin.Context) {
|
||||
limit := queryInt(c, "limit", defaultLimit)
|
||||
query := queryStr(c, "query", "")
|
||||
|
||||
total, err := models.MetricDescriptionTotal(query)
|
||||
dangerous(err)
|
||||
|
||||
list, err := models.MetricDescriptionGets(query, limit, offset(c, limit))
|
||||
dangerous(err)
|
||||
|
||||
renderData(c, gin.H{
|
||||
"list": list,
|
||||
"total": total,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
type metricDescriptionFrom struct {
|
||||
Data string `json:"data"`
|
||||
}
|
||||
|
||||
// 没有单个新增的功能,只有批量导入
|
||||
func metricDescriptionAdd(c *gin.Context) {
|
||||
var f metricDescriptionFrom
|
||||
var metricDescriptions []models.MetricDescription
|
||||
bind(c, &f)
|
||||
lines := strings.Split(f.Data, "\n")
|
||||
for _, md := range lines {
|
||||
arr := strings.Split(md, ":")
|
||||
if len(arr) != 2 {
|
||||
bomb(200, "metric description %s is illegal", md)
|
||||
}
|
||||
m := models.MetricDescription{
|
||||
Metric: arr[0],
|
||||
Description: arr[1],
|
||||
}
|
||||
metricDescriptions = append(metricDescriptions, m)
|
||||
}
|
||||
|
||||
if len(metricDescriptions) == 0 {
|
||||
bomb(http.StatusBadRequest, "Decoded metric description empty")
|
||||
}
|
||||
|
||||
loginUser(c).MustPerm("metric_description_create")
|
||||
|
||||
renderMessage(c, models.MetricDescriptionUpdate(metricDescriptions))
|
||||
}
|
||||
|
||||
func metricDescriptionDel(c *gin.Context) {
|
||||
var f idsForm
|
||||
bind(c, &f)
|
||||
|
||||
loginUser(c).MustPerm("metric_description_delete")
|
||||
|
||||
renderMessage(c, models.MetricDescriptionDel(f.Ids))
|
||||
}
|
||||
|
||||
type metricDescriptionForm struct {
|
||||
Description string `json:"description"`
|
||||
}
|
||||
|
||||
func metricDescriptionPut(c *gin.Context) {
|
||||
var f metricDescriptionForm
|
||||
bind(c, &f)
|
||||
|
||||
loginUser(c).MustPerm("metric_description_modify")
|
||||
|
||||
md := MetricDescription(urlParamInt64(c, "id"))
|
||||
md.Description = f.Description
|
||||
|
||||
renderMessage(c, md.Update("description"))
|
||||
}
|
|
@ -1,62 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
func muteGets(c *gin.Context) {
|
||||
limit := queryInt(c, "limit", defaultLimit)
|
||||
query := queryStr(c, "query", "")
|
||||
|
||||
total, err := models.MuteTotal(query)
|
||||
dangerous(err)
|
||||
|
||||
list, err := models.MuteGets(query, limit, offset(c, limit))
|
||||
dangerous(err)
|
||||
|
||||
renderData(c, gin.H{
|
||||
"list": list,
|
||||
"total": total,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
type muteForm struct {
|
||||
ClasspathPrefix string `json:"classpath_prefix "`
|
||||
Metric string `json:"metric"`
|
||||
ResFilters string `json:"res_filters"`
|
||||
TagFilters string `json:"tags_filters"`
|
||||
Cause string `json:"cause"`
|
||||
Btime int64 `json:"btime"`
|
||||
Etime int64 `json:"etime"`
|
||||
}
|
||||
|
||||
func muteAdd(c *gin.Context) {
|
||||
var f muteForm
|
||||
bind(c, &f)
|
||||
|
||||
me := loginUser(c).MustPerm("mute_create")
|
||||
|
||||
mt := models.Mute{
|
||||
ClasspathPrefix: f.ClasspathPrefix,
|
||||
Metric: f.Metric,
|
||||
ResFilters: f.ResFilters,
|
||||
TagFilters: f.TagFilters,
|
||||
Cause: f.Cause,
|
||||
Btime: f.Btime,
|
||||
Etime: f.Etime,
|
||||
CreateBy: me.Username,
|
||||
}
|
||||
|
||||
renderMessage(c, mt.Add())
|
||||
}
|
||||
|
||||
func muteGet(c *gin.Context) {
|
||||
renderData(c, Mute(urlParamInt64(c, "id")), nil)
|
||||
}
|
||||
|
||||
func muteDel(c *gin.Context) {
|
||||
loginUser(c).MustPerm("mute_delete")
|
||||
renderMessage(c, Mute(urlParamInt64(c, "id")).Del())
|
||||
}
|
|
@ -1,24 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/prometheus/prometheus/promql/parser"
|
||||
|
||||
"github.com/didi/nightingale/v5/vos"
|
||||
)
|
||||
|
||||
func checkPromeQl(c *gin.Context) {
|
||||
|
||||
ql := c.Query("promql")
|
||||
_, err := parser.ParseExpr(ql)
|
||||
respD := &vos.PromQlCheckResp{}
|
||||
isCorrect := true
|
||||
if err != nil {
|
||||
|
||||
isCorrect = false
|
||||
respD.ParseError = err.Error()
|
||||
}
|
||||
|
||||
respD.QlCorrect = isCorrect
|
||||
renderData(c, respD, nil)
|
||||
}
|
|
@ -1,190 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/str"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
func classpathGetsResources(c *gin.Context) {
|
||||
limit := queryInt(c, "limit", defaultLimit)
|
||||
prefix := queryInt(c, "prefix", 0)
|
||||
query := queryStr(c, "query", "")
|
||||
|
||||
cp := Classpath(urlParamInt64(c, "id"))
|
||||
var classpathIds []int64
|
||||
if prefix == 1 {
|
||||
cps, err := models.ClasspathGetsByPrefix(cp.Path)
|
||||
dangerous(err)
|
||||
for i := range cps {
|
||||
classpathIds = append(classpathIds, cps[i].Id)
|
||||
}
|
||||
} else {
|
||||
classpathIds = append(classpathIds, cp.Id)
|
||||
}
|
||||
|
||||
total, err := models.ResourceTotalByClasspathId(classpathIds, query)
|
||||
dangerous(err)
|
||||
|
||||
reses, err := models.ResourceGetsByClasspathId(classpathIds, query, limit, offset(c, limit))
|
||||
dangerous(err)
|
||||
|
||||
renderData(c, gin.H{
|
||||
"classpath": cp,
|
||||
"list": reses,
|
||||
"total": total,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func resourcesQuery(c *gin.Context) {
|
||||
limit := queryInt(c, "limit", defaultLimit)
|
||||
qres := queryStr(c, "qres", "")
|
||||
|
||||
// qpaths 可以选择多个,英文逗号分隔的多个id
|
||||
qpaths := str.IdsInt64(queryStr(c, "qpaths", ""))
|
||||
|
||||
total, err := models.ResourceTotalByClasspathQuery(qpaths, qres)
|
||||
dangerous(err)
|
||||
|
||||
reses, err := models.ResourceGetsByClasspathQuery(qpaths, qres, limit, offset(c, limit))
|
||||
dangerous(err)
|
||||
|
||||
if len(reses) == 0 {
|
||||
renderZeroPage(c)
|
||||
return
|
||||
}
|
||||
|
||||
renderData(c, gin.H{
|
||||
"list": reses,
|
||||
"total": total,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
func resourceGet(c *gin.Context) {
|
||||
renderData(c, Resource(urlParamInt64(c, "id")), nil)
|
||||
}
|
||||
|
||||
func resourceDel(c *gin.Context) {
|
||||
loginUser(c).MustPerm("resource_modify")
|
||||
renderData(c, Resource(urlParamInt64(c, "id")).Del(), nil)
|
||||
}
|
||||
|
||||
type resourceNoteForm struct {
|
||||
Ids []int64 `json:"ids"`
|
||||
Note string `json:"note"`
|
||||
}
|
||||
|
||||
// 修改主机设备的备注
|
||||
func resourceNotePut(c *gin.Context) {
|
||||
var f resourceNoteForm
|
||||
bind(c, &f)
|
||||
|
||||
if len(f.Ids) == 0 {
|
||||
bomb(http.StatusBadRequest, "ids is empty")
|
||||
}
|
||||
|
||||
loginUser(c).MustPerm("resource_modify")
|
||||
|
||||
renderMessage(c, models.ResourceUpdateNote(f.Ids, f.Note))
|
||||
}
|
||||
|
||||
type resourceTagsForm struct {
|
||||
Ids []int64 `json:"ids"`
|
||||
Tags string `json:"tags"`
|
||||
}
|
||||
|
||||
func resourceTagsPut(c *gin.Context) {
|
||||
var f resourceTagsForm
|
||||
bind(c, &f)
|
||||
|
||||
if len(f.Ids) == 0 {
|
||||
bomb(http.StatusBadRequest, "ids is empty")
|
||||
}
|
||||
|
||||
loginUser(c).MustPerm("resource_modify")
|
||||
|
||||
renderMessage(c, models.ResourceUpdateTags(f.Ids, f.Tags))
|
||||
}
|
||||
|
||||
type resourceMuteForm struct {
|
||||
Ids []int64 `json:"ids"`
|
||||
Btime int64 `json:"btime"`
|
||||
Etime int64 `json:"etime"`
|
||||
}
|
||||
|
||||
func resourceMutePut(c *gin.Context) {
|
||||
var f resourceMuteForm
|
||||
bind(c, &f)
|
||||
|
||||
if len(f.Ids) == 0 {
|
||||
bomb(http.StatusBadRequest, "ids is empty")
|
||||
}
|
||||
|
||||
loginUser(c).MustPerm("resource_modify")
|
||||
|
||||
renderMessage(c, models.ResourceUpdateMute(f.Ids, f.Btime, f.Etime))
|
||||
}
|
||||
|
||||
type resourceClasspathsForm struct {
|
||||
ResIdents []string `json:"res_idents"`
|
||||
ClasspathIds []int64 `json:"classpath_ids"`
|
||||
}
|
||||
|
||||
func resourceClasspathsPut(c *gin.Context) {
|
||||
var f resourceClasspathsForm
|
||||
m := make(map[string]map[int64]struct{}) //store database data to compare
|
||||
toAdd := make(map[string][]int64)
|
||||
|
||||
bind(c, &f)
|
||||
loginUser(c).MustPerm("resource_modify")
|
||||
|
||||
sql := "res_ident in (\"" + strings.Join(f.ResIdents, ",") + "\")"
|
||||
oldClasspathResources, err := models.ClasspathResourceGets(sql)
|
||||
dangerous(err)
|
||||
|
||||
for _, obj := range oldClasspathResources {
|
||||
if _, exists := m[obj.ResIdent]; !exists {
|
||||
m[obj.ResIdent] = make(map[int64]struct{})
|
||||
}
|
||||
m[obj.ResIdent][obj.ClasspathId] = struct{}{}
|
||||
}
|
||||
|
||||
for _, ident := range f.ResIdents {
|
||||
toAdd[ident] = []int64{}
|
||||
if _, exists := m[ident]; exists {
|
||||
for _, classpathId := range f.ClasspathIds {
|
||||
if _, exists := m[ident][classpathId]; exists {
|
||||
// classpathResource 在数据库中已存在,不做处理
|
||||
delete(m[ident], classpathId)
|
||||
} else {
|
||||
toAdd[ident] = append(toAdd[ident], classpathId)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
toAdd[ident] = f.ClasspathIds
|
||||
}
|
||||
}
|
||||
|
||||
//删除数据库中多余的classpathResources
|
||||
for ident := range m {
|
||||
for classpathId := range m[ident] {
|
||||
if classpathId == 1 {
|
||||
continue
|
||||
}
|
||||
|
||||
dangerous(models.ClasspathResourceDel(classpathId, []string{ident}))
|
||||
}
|
||||
}
|
||||
|
||||
//添加数据库没有的classpathResources
|
||||
for ident, cids := range toAdd {
|
||||
for _, cid := range cids {
|
||||
dangerous(models.ClasspathResourceAdd(cid, ident))
|
||||
}
|
||||
}
|
||||
renderMessage(c, nil)
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
func rolesGet(c *gin.Context) {
|
||||
lst, err := models.RoleGetsAll()
|
||||
renderData(c, lst, err)
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func selfProfileGet(c *gin.Context) {
|
||||
renderData(c, loginUser(c), nil)
|
||||
}
|
||||
|
||||
type selfProfileForm struct {
|
||||
Nickname string `json:"nickname"`
|
||||
Phone string `json:"phone"`
|
||||
Email string `json:"email"`
|
||||
Portrait string `json:"portrait"`
|
||||
Contacts json.RawMessage `json:"contacts"`
|
||||
}
|
||||
|
||||
func selfProfilePut(c *gin.Context) {
|
||||
var f selfProfileForm
|
||||
bind(c, &f)
|
||||
|
||||
user := loginUser(c)
|
||||
user.Nickname = f.Nickname
|
||||
user.Phone = f.Phone
|
||||
user.Email = f.Email
|
||||
user.Portrait = f.Portrait
|
||||
user.Contacts = f.Contacts
|
||||
user.UpdateAt = time.Now().Unix()
|
||||
user.UpdateBy = user.Username
|
||||
|
||||
renderMessage(
|
||||
c,
|
||||
user.Update(
|
||||
"nickname",
|
||||
"phone",
|
||||
"email",
|
||||
"portrait",
|
||||
"contacts",
|
||||
"update_at",
|
||||
"update_by",
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
type selfPasswordForm struct {
|
||||
OldPass string `json:"oldpass" binding:"required"`
|
||||
NewPass string `json:"newpass" binding:"required"`
|
||||
}
|
||||
|
||||
func selfPasswordPut(c *gin.Context) {
|
||||
var f selfPasswordForm
|
||||
bind(c, &f)
|
||||
renderMessage(c, loginUser(c).ChangePassword(f.OldPass, f.NewPass))
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func Status(c *gin.Context) {
|
||||
var err error
|
||||
data := make(map[string]int64)
|
||||
data["user_total"], err = models.UserTotal("")
|
||||
dangerous(err)
|
||||
|
||||
data["user_group_total"], err = models.UserGroupTotal("")
|
||||
dangerous(err)
|
||||
|
||||
data["resource_total"], err = models.ResourceTotal("")
|
||||
dangerous(err)
|
||||
|
||||
data["alert_rule_total"], err = models.AlertRuleTotal("")
|
||||
dangerous(err)
|
||||
|
||||
data["dashboard_total"], err = models.DashboardCount("")
|
||||
dangerous(err)
|
||||
|
||||
now := time.Now().Unix()
|
||||
stime := now - 24*3600
|
||||
data["event_total_day"], err = models.AlertEventTotal(stime, now, "", -1, -1)
|
||||
dangerous(err)
|
||||
|
||||
stime = now - 7*24*3600
|
||||
data["event_total_week"], err = models.AlertEventTotal(stime, now, "", -1, -1)
|
||||
dangerous(err)
|
||||
|
||||
stime = now - 30*24*3600
|
||||
data["event_total_month"], err = models.AlertEventTotal(stime, now, "", -1, -1)
|
||||
dangerous(err)
|
||||
|
||||
renderData(c, data, nil)
|
||||
}
|
|
@ -1,32 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
func selfTokenGets(c *gin.Context) {
|
||||
objs, err := models.UserTokenGets("user_id=?", loginUser(c).Id)
|
||||
renderData(c, objs, err)
|
||||
}
|
||||
|
||||
func selfTokenPost(c *gin.Context) {
|
||||
user := loginUser(c)
|
||||
obj, err := models.UserTokenNew(user.Id, user.Username)
|
||||
renderData(c, obj, err)
|
||||
}
|
||||
|
||||
type selfTokenForm struct {
|
||||
Token string `json:"token"`
|
||||
}
|
||||
|
||||
func selfTokenPut(c *gin.Context) {
|
||||
user := loginUser(c)
|
||||
|
||||
var f selfTokenForm
|
||||
bind(c, &f)
|
||||
|
||||
obj, err := models.UserTokenReset(user.Id, f.Token)
|
||||
renderData(c, obj, err)
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"path"
|
||||
|
||||
"github.com/didi/nightingale/v5/config"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/toolkits/pkg/file"
|
||||
)
|
||||
|
||||
func tplNameGets(c *gin.Context) {
|
||||
tplType := queryStr(c, "tpl_type")
|
||||
|
||||
var files []string
|
||||
var err error
|
||||
switch tplType {
|
||||
case "alert_rule":
|
||||
files, err = file.FilesUnder(config.Config.Tpl.AlertRulePath)
|
||||
dangerous(err)
|
||||
case "dashboard":
|
||||
files, err = file.FilesUnder(config.Config.Tpl.DashboardPath)
|
||||
dangerous(err)
|
||||
default:
|
||||
bomb(http.StatusBadRequest, "tpl type not found")
|
||||
}
|
||||
|
||||
renderData(c, files, err)
|
||||
}
|
||||
|
||||
func tplGet(c *gin.Context) {
|
||||
tplName := path.Base(queryStr(c, "tpl_name"))
|
||||
tplType := queryStr(c, "tpl_type")
|
||||
|
||||
var filePath string
|
||||
switch tplType {
|
||||
case "alert_rule":
|
||||
filePath = config.Config.Tpl.AlertRulePath + "/" + tplName
|
||||
case "dashboard":
|
||||
filePath = config.Config.Tpl.DashboardPath + "/" + tplName
|
||||
default:
|
||||
bomb(http.StatusBadRequest, "tpl type not found")
|
||||
}
|
||||
|
||||
if !file.IsExist(filePath) {
|
||||
bomb(http.StatusBadRequest, "tpl not found")
|
||||
}
|
||||
|
||||
b, err := ioutil.ReadFile(filePath)
|
||||
dangerous(err)
|
||||
|
||||
var content interface{}
|
||||
err = json.Unmarshal(b, &content)
|
||||
renderData(c, content, err)
|
||||
}
|
|
@ -1,221 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"compress/zlib"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
|
||||
"github.com/didi/nightingale/v5/backend"
|
||||
"github.com/didi/nightingale/v5/cache"
|
||||
"github.com/didi/nightingale/v5/trans"
|
||||
"github.com/didi/nightingale/v5/vos"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
agentpayload "github.com/n9e/agent-payload/gogen"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
// 错误消息也是返回了200,是和客户端的约定,客户端如果发现code!=200就会重试
|
||||
func PushSeries(c *gin.Context) {
|
||||
req := agentpayload.N9EMetricsPayload{}
|
||||
|
||||
r := c.Request
|
||||
reader := r.Body
|
||||
|
||||
var err error
|
||||
if encoding := r.Header.Get("Content-Encoding"); encoding == "gzip" {
|
||||
if reader, err = gzip.NewReader(r.Body); err != nil {
|
||||
message := fmt.Sprintf("error: get gzip reader occur error: %v", err)
|
||||
logger.Warning(message)
|
||||
c.String(200, message)
|
||||
return
|
||||
}
|
||||
defer reader.Close()
|
||||
} else if encoding == "deflate" {
|
||||
if reader, err = zlib.NewReader(r.Body); err != nil {
|
||||
message := fmt.Sprintf("error: get zlib reader occur error: %v", err)
|
||||
logger.Warning(message)
|
||||
c.String(200, message)
|
||||
return
|
||||
}
|
||||
defer reader.Close()
|
||||
}
|
||||
|
||||
b, err := ioutil.ReadAll(reader)
|
||||
if err != nil {
|
||||
message := fmt.Sprintf("error: ioutil occur error: %v", err)
|
||||
logger.Warning(message)
|
||||
c.String(200, message)
|
||||
return
|
||||
}
|
||||
|
||||
if r.Header.Get("Content-Type") == "application/x-protobuf" {
|
||||
if err := req.Unmarshal(b); err != nil {
|
||||
message := fmt.Sprintf("error: decode protobuf body occur error: %v", err)
|
||||
logger.Warning(message)
|
||||
c.String(200, message)
|
||||
return
|
||||
}
|
||||
|
||||
count := len(req.Samples)
|
||||
if count == 0 {
|
||||
c.String(200, "error: samples is empty")
|
||||
return
|
||||
}
|
||||
|
||||
metricPoints := make([]*vos.MetricPoint, 0, count)
|
||||
for i := 0; i < count; i++ {
|
||||
logger.Debugf("recv %v", req.Samples[i])
|
||||
metricPoints = append(metricPoints, convertAgentdPoint(req.Samples[i]))
|
||||
}
|
||||
|
||||
if err = trans.Push(metricPoints); err != nil {
|
||||
logger.Warningf("error: trans.push %+v err:%v", req.Samples, err)
|
||||
c.String(200, "error: "+err.Error())
|
||||
} else {
|
||||
c.String(200, "success: received %d points", len(metricPoints))
|
||||
}
|
||||
} else {
|
||||
logger.Warningf("error: trans.push %+v Content-Type(%s) not equals application/x-protobuf", req.Samples)
|
||||
c.String(200, "error: Content-Type(%s) not equals application/x-protobuf")
|
||||
}
|
||||
}
|
||||
|
||||
func convertAgentdPoint(obj *agentpayload.N9EMetricsPayload_Sample) *vos.MetricPoint {
|
||||
return &vos.MetricPoint{
|
||||
Metric: obj.Metric,
|
||||
Ident: obj.Ident,
|
||||
Alias: obj.Alias,
|
||||
TagsMap: obj.Tags,
|
||||
Time: obj.Time,
|
||||
ValueUntyped: obj.Value,
|
||||
}
|
||||
}
|
||||
|
||||
func PushData(c *gin.Context) {
|
||||
var points []*vos.MetricPoint
|
||||
err := c.ShouldBindJSON(&points)
|
||||
if err != nil {
|
||||
message := fmt.Sprintf("error: decode json body occur error: %v", err)
|
||||
logger.Warning(message)
|
||||
c.String(200, message)
|
||||
return
|
||||
}
|
||||
|
||||
if err = trans.Push(points); err != nil {
|
||||
c.String(200, "error: "+err.Error())
|
||||
} else {
|
||||
c.String(200, "success")
|
||||
}
|
||||
}
|
||||
|
||||
func GetTagKeys(c *gin.Context) {
|
||||
recv := vos.CommonTagQueryParam{}
|
||||
dangerous(c.ShouldBindJSON(&recv))
|
||||
|
||||
dataSource, err := backend.GetDataSourceFor("")
|
||||
if err != nil {
|
||||
logger.Warningf("could not find datasource")
|
||||
renderMessage(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
resp := dataSource.QueryTagKeys(recv)
|
||||
renderData(c, resp, nil)
|
||||
}
|
||||
|
||||
func GetTagValues(c *gin.Context) {
|
||||
recv := vos.CommonTagQueryParam{}
|
||||
dangerous(c.ShouldBindJSON(&recv))
|
||||
|
||||
dataSource, err := backend.GetDataSourceFor("")
|
||||
if err != nil {
|
||||
logger.Warningf("could not find datasource")
|
||||
renderMessage(c, err)
|
||||
return
|
||||
}
|
||||
if recv.TagKey == "" {
|
||||
renderMessage(c, errors.New("missing tag_key"))
|
||||
return
|
||||
}
|
||||
resp := dataSource.QueryTagValues(recv)
|
||||
renderData(c, resp, nil)
|
||||
}
|
||||
|
||||
func GetMetrics(c *gin.Context) {
|
||||
recv := vos.MetricQueryParam{}
|
||||
dangerous(c.ShouldBindJSON(&recv))
|
||||
|
||||
dataSource, err := backend.GetDataSourceFor("")
|
||||
if err != nil {
|
||||
logger.Warningf("could not find datasource")
|
||||
renderMessage(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
resp := dataSource.QueryMetrics(recv)
|
||||
logger.Debugf("[GetMetrics][recv:%+v][resp:%+v]", recv, resp)
|
||||
res := &vos.MetricDesQueryResp{
|
||||
Metrics: make([]vos.MetricsWithDescription, 0),
|
||||
}
|
||||
|
||||
for _, metric := range resp.Metrics {
|
||||
t := vos.MetricsWithDescription{
|
||||
Name: metric,
|
||||
}
|
||||
|
||||
description, exists := cache.MetricDescMapper.Get(metric)
|
||||
if exists {
|
||||
t.Description = description.(string)
|
||||
}
|
||||
|
||||
res.Metrics = append(res.Metrics, t)
|
||||
}
|
||||
|
||||
renderData(c, res, nil)
|
||||
}
|
||||
|
||||
func GetTagPairs(c *gin.Context) {
|
||||
recv := vos.CommonTagQueryParam{}
|
||||
dangerous(c.ShouldBindJSON(&recv))
|
||||
|
||||
dataSource, err := backend.GetDataSourceFor("")
|
||||
if err != nil {
|
||||
logger.Warningf("could not find datasource")
|
||||
renderMessage(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
resp := dataSource.QueryTagPairs(recv)
|
||||
renderData(c, resp, nil)
|
||||
}
|
||||
|
||||
func GetData(c *gin.Context) {
|
||||
dataSource, err := backend.GetDataSourceFor("")
|
||||
if err != nil {
|
||||
logger.Warningf("could not find datasource")
|
||||
renderMessage(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
var input vos.DataQueryParam
|
||||
dangerous(c.ShouldBindJSON(&input))
|
||||
resp := dataSource.QueryData(input)
|
||||
renderData(c, resp, nil)
|
||||
}
|
||||
|
||||
func GetDataInstant(c *gin.Context) {
|
||||
dataSource, err := backend.GetDataSourceFor("")
|
||||
if err != nil {
|
||||
logger.Warningf("could not find datasource")
|
||||
renderMessage(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
var input vos.DataQueryInstantParam
|
||||
dangerous(c.ShouldBindJSON(&input))
|
||||
resp := dataSource.QueryDataInstant(input.PromeQl)
|
||||
renderData(c, resp, nil)
|
||||
}
|
|
@ -1,197 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/config"
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
func userGets(c *gin.Context) {
|
||||
limit := queryInt(c, "limit", defaultLimit)
|
||||
query := queryStr(c, "query", "")
|
||||
|
||||
total, err := models.UserTotal(query)
|
||||
dangerous(err)
|
||||
|
||||
list, err := models.UserGets(query, limit, offset(c, limit))
|
||||
dangerous(err)
|
||||
|
||||
admin := false
|
||||
roles := strings.Fields(loginUser(c).RolesForDB)
|
||||
for i := 0; i < len(roles); i++ {
|
||||
if roles[i] == "Admin" {
|
||||
admin = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
renderData(c, gin.H{
|
||||
"list": list,
|
||||
"total": total,
|
||||
"admin": admin,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
type userAddForm struct {
|
||||
Username string `json:"username" binding:"required"`
|
||||
Password string `json:"password" binding:"required"`
|
||||
Nickname string `json:"nickname"`
|
||||
Phone string `json:"phone"`
|
||||
Email string `json:"email"`
|
||||
Portrait string `json:"portrait"`
|
||||
Roles []string `json:"roles"`
|
||||
Contacts json.RawMessage `json:"contacts"`
|
||||
}
|
||||
|
||||
func userAddPost(c *gin.Context) {
|
||||
var f userAddForm
|
||||
bind(c, &f)
|
||||
|
||||
password, err := models.CryptoPass(f.Password)
|
||||
dangerous(err)
|
||||
|
||||
now := time.Now().Unix()
|
||||
username := loginUsername(c)
|
||||
|
||||
if len(f.Roles) == 0 {
|
||||
bomb(200, "roles empty")
|
||||
}
|
||||
|
||||
u := models.User{
|
||||
Username: f.Username,
|
||||
Password: password,
|
||||
Nickname: f.Nickname,
|
||||
Phone: f.Phone,
|
||||
Email: f.Email,
|
||||
Portrait: f.Portrait,
|
||||
RolesForDB: strings.Join(f.Roles, " "),
|
||||
Contacts: f.Contacts,
|
||||
CreateAt: now,
|
||||
UpdateAt: now,
|
||||
CreateBy: username,
|
||||
UpdateBy: username,
|
||||
}
|
||||
|
||||
renderMessage(c, u.Add())
|
||||
}
|
||||
|
||||
func userProfileGet(c *gin.Context) {
|
||||
renderData(c, User(urlParamInt64(c, "id")), nil)
|
||||
}
|
||||
|
||||
type userProfileForm struct {
|
||||
Nickname string `json:"nickname"`
|
||||
Phone string `json:"phone"`
|
||||
Email string `json:"email"`
|
||||
Portrait string `json:"portrait"`
|
||||
Roles []string `json:"roles"`
|
||||
Status int `json:"status"`
|
||||
Contacts json.RawMessage `json:"contacts"`
|
||||
}
|
||||
|
||||
func userProfilePut(c *gin.Context) {
|
||||
var f userProfileForm
|
||||
bind(c, &f)
|
||||
|
||||
if len(f.Roles) == 0 {
|
||||
bomb(200, "roles empty")
|
||||
}
|
||||
|
||||
target := User(urlParamInt64(c, "id"))
|
||||
target.Nickname = f.Nickname
|
||||
target.Phone = f.Phone
|
||||
target.Email = f.Email
|
||||
target.Portrait = f.Portrait
|
||||
target.RolesForDB = strings.Join(f.Roles, " ")
|
||||
target.Status = f.Status
|
||||
target.Contacts = f.Contacts
|
||||
target.UpdateAt = time.Now().Unix()
|
||||
target.UpdateBy = loginUsername(c)
|
||||
renderMessage(
|
||||
c,
|
||||
target.Update(
|
||||
"nickname",
|
||||
"phone",
|
||||
"email",
|
||||
"portrait",
|
||||
"roles",
|
||||
"status",
|
||||
"contacts",
|
||||
"update_at",
|
||||
"update_by",
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
type userPasswordForm struct {
|
||||
Password string `json:"password" binding:"required"`
|
||||
}
|
||||
|
||||
func userPasswordPut(c *gin.Context) {
|
||||
var f userPasswordForm
|
||||
bind(c, &f)
|
||||
|
||||
target := User(urlParamInt64(c, "id"))
|
||||
|
||||
cryptoPass, err := models.CryptoPass(f.Password)
|
||||
dangerous(err)
|
||||
|
||||
target.Password = cryptoPass
|
||||
target.UpdateAt = time.Now().Unix()
|
||||
target.UpdateBy = loginUsername(c)
|
||||
renderMessage(c, target.Update("password", "update_at", "update_by"))
|
||||
}
|
||||
|
||||
type userStatusForm struct {
|
||||
Status int `json:"status"`
|
||||
}
|
||||
|
||||
func userStatusPut(c *gin.Context) {
|
||||
var f userStatusForm
|
||||
bind(c, &f)
|
||||
|
||||
target := User(urlParamInt64(c, "id"))
|
||||
target.Status = f.Status
|
||||
target.UpdateAt = time.Now().Unix()
|
||||
target.UpdateBy = loginUsername(c)
|
||||
renderMessage(c, target.Update("status", "update_at", "update_by"))
|
||||
}
|
||||
|
||||
func userDel(c *gin.Context) {
|
||||
id := urlParamInt64(c, "id")
|
||||
target, err := models.UserGet("id=?", id)
|
||||
dangerous(err)
|
||||
|
||||
if target == nil {
|
||||
renderMessage(c, nil)
|
||||
return
|
||||
}
|
||||
|
||||
renderMessage(c, target.Del())
|
||||
}
|
||||
|
||||
func contactChannelsGet(c *gin.Context) {
|
||||
renderData(c, config.Config.ContactKeys, nil)
|
||||
}
|
||||
|
||||
func getUserByName(c *gin.Context) {
|
||||
user, err := models.UserGetByUsername(queryStr(c, "name"))
|
||||
renderData(c, user, err)
|
||||
}
|
||||
|
||||
func getUserByToken(c *gin.Context) {
|
||||
userToken, err := models.UserTokenGet("token=?", queryStr(c, "token"))
|
||||
dangerous(err)
|
||||
if userToken == nil {
|
||||
renderMessage(c, nil)
|
||||
return
|
||||
}
|
||||
|
||||
user, err := models.UserGetByUsername(userToken.Username)
|
||||
renderData(c, user, err)
|
||||
}
|
|
@ -1,173 +0,0 @@
|
|||
package http
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
)
|
||||
|
||||
func userGroupListGet(c *gin.Context) {
|
||||
limit := queryInt(c, "limit", defaultLimit)
|
||||
query := queryStr(c, "query", "")
|
||||
|
||||
total, err := models.UserGroupTotal(query)
|
||||
dangerous(err)
|
||||
|
||||
list, err := models.UserGroupGets(query, limit, offset(c, limit))
|
||||
dangerous(err)
|
||||
|
||||
renderData(c, gin.H{
|
||||
"list": list,
|
||||
"total": total,
|
||||
}, nil)
|
||||
}
|
||||
|
||||
// 与我相关的用户组,我创建的,或者我是其中一员
|
||||
// 这个量不大,搜索和分页都放在前端来做,后端搞起来比较麻烦
|
||||
func userGroupMineGet(c *gin.Context) {
|
||||
list, err := loginUser(c).MyUserGroups()
|
||||
renderData(c, list, err)
|
||||
}
|
||||
|
||||
type userGroupForm struct {
|
||||
Name string `json:"name"`
|
||||
Note string `json:"note"`
|
||||
}
|
||||
|
||||
func userGroupAdd(c *gin.Context) {
|
||||
var f userGroupForm
|
||||
bind(c, &f)
|
||||
|
||||
me := loginUser(c)
|
||||
|
||||
ug := models.UserGroup{
|
||||
Name: f.Name,
|
||||
Note: f.Note,
|
||||
CreateBy: me.Username,
|
||||
UpdateBy: me.Username,
|
||||
}
|
||||
|
||||
dangerous(ug.Add())
|
||||
|
||||
// 顺便把创建者也作为团队的一员,失败了也没关系,用户会重新添加成员
|
||||
models.UserGroupMemberAdd(ug.Id, me.Id)
|
||||
|
||||
renderData(c, ug.Id, nil)
|
||||
}
|
||||
|
||||
func userGroupPut(c *gin.Context) {
|
||||
var f userGroupForm
|
||||
bind(c, &f)
|
||||
|
||||
me := loginUser(c)
|
||||
ug := UserGroup(urlParamInt64(c, "id"))
|
||||
|
||||
can, err := me.CanModifyUserGroup(ug)
|
||||
dangerous(err)
|
||||
|
||||
if !can {
|
||||
bomb(http.StatusForbidden, "forbidden")
|
||||
}
|
||||
|
||||
if ug.Name != f.Name {
|
||||
// 如果name发生变化,需要检查这个新name是否与别的group重名
|
||||
num, err := models.UserGroupCount("name=? and id<>?", f.Name, ug.Id)
|
||||
dangerous(err)
|
||||
|
||||
if num > 0 {
|
||||
bomb(200, "UserGroup %s already exists", f.Name)
|
||||
}
|
||||
}
|
||||
|
||||
ug.Name = f.Name
|
||||
ug.Note = f.Note
|
||||
ug.UpdateBy = me.Username
|
||||
ug.UpdateAt = time.Now().Unix()
|
||||
|
||||
renderMessage(c, ug.Update("name", "note", "update_at", "update_by"))
|
||||
}
|
||||
|
||||
// 不但返回UserGroup的信息,也把成员信息返回,成员不会特别多,所以,
|
||||
// 成员全部返回,由前端分页、查询
|
||||
func userGroupGet(c *gin.Context) {
|
||||
ug := UserGroup(urlParamInt64(c, "id"))
|
||||
|
||||
ids, err := ug.MemberIds()
|
||||
dangerous(err)
|
||||
|
||||
users, err := models.UserGetsByIds(ids)
|
||||
|
||||
renderData(c, gin.H{
|
||||
"users": users,
|
||||
"user_group": ug,
|
||||
}, err)
|
||||
}
|
||||
|
||||
func userGroupMemberAdd(c *gin.Context) {
|
||||
var f idsForm
|
||||
bind(c, &f)
|
||||
f.Validate()
|
||||
|
||||
me := loginUser(c)
|
||||
ug := UserGroup(urlParamInt64(c, "id"))
|
||||
|
||||
can, err := me.CanModifyUserGroup(ug)
|
||||
dangerous(err)
|
||||
|
||||
if !can {
|
||||
bomb(http.StatusForbidden, "forbidden")
|
||||
}
|
||||
|
||||
dangerous(ug.AddMembers(f.Ids))
|
||||
|
||||
// 用户组的成员发生变化,相当于更新了用户组
|
||||
// 如果更新失败了直接忽略,不是啥大事
|
||||
ug.UpdateAt = time.Now().Unix()
|
||||
ug.UpdateBy = me.Username
|
||||
ug.Update("update_at", "update_by")
|
||||
|
||||
renderMessage(c, nil)
|
||||
}
|
||||
|
||||
func userGroupMemberDel(c *gin.Context) {
|
||||
var f idsForm
|
||||
bind(c, &f)
|
||||
f.Validate()
|
||||
|
||||
me := loginUser(c)
|
||||
ug := UserGroup(urlParamInt64(c, "id"))
|
||||
|
||||
can, err := me.CanModifyUserGroup(ug)
|
||||
dangerous(err)
|
||||
|
||||
if !can {
|
||||
bomb(http.StatusForbidden, "forbidden")
|
||||
}
|
||||
|
||||
dangerous(ug.DelMembers(f.Ids))
|
||||
|
||||
// 用户组的成员发生变化,相当于更新了用户组
|
||||
// 如果更新失败了直接忽略,不是啥大事
|
||||
ug.UpdateAt = time.Now().Unix()
|
||||
ug.UpdateBy = me.Username
|
||||
ug.Update("update_at", "update_by")
|
||||
|
||||
renderMessage(c, nil)
|
||||
}
|
||||
|
||||
func userGroupDel(c *gin.Context) {
|
||||
me := loginUser(c)
|
||||
ug := UserGroup(urlParamInt64(c, "id"))
|
||||
|
||||
can, err := me.CanModifyUserGroup(ug)
|
||||
dangerous(err)
|
||||
|
||||
if !can {
|
||||
bomb(http.StatusForbidden, "forbidden")
|
||||
}
|
||||
|
||||
renderMessage(c, ug.Del())
|
||||
}
|
426
judge/compute.go
426
judge/compute.go
|
@ -1,426 +0,0 @@
|
|||
// Copyright 2017 Xiaomi, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package judge
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/didi/nightingale/v5/vos"
|
||||
)
|
||||
|
||||
type Function interface {
|
||||
Compute(vs []*vos.HPoint) (leftValue vos.JsonFloat, isTriggered bool)
|
||||
}
|
||||
|
||||
type MaxFunction struct {
|
||||
Function
|
||||
Limit int
|
||||
Operator string
|
||||
RightValue float64
|
||||
}
|
||||
|
||||
func (f MaxFunction) Compute(vs []*vos.HPoint) (leftValue vos.JsonFloat, isTriggered bool) {
|
||||
count := len(vs)
|
||||
if count < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
max := vs[0].Value
|
||||
for i := 1; i < len(vs); i++ {
|
||||
if max < vs[i].Value {
|
||||
max = vs[i].Value
|
||||
}
|
||||
}
|
||||
|
||||
leftValue = max
|
||||
isTriggered = checkIsTriggered(leftValue, f.Operator, f.RightValue)
|
||||
return
|
||||
}
|
||||
|
||||
type MinFunction struct {
|
||||
Function
|
||||
Limit int
|
||||
Operator string
|
||||
RightValue float64
|
||||
}
|
||||
|
||||
func (f MinFunction) Compute(vs []*vos.HPoint) (leftValue vos.JsonFloat, isTriggered bool) {
|
||||
count := len(vs)
|
||||
if count < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
min := vs[0].Value
|
||||
|
||||
for i := 1; i < len(vs); i++ {
|
||||
if min > vs[i].Value {
|
||||
min = vs[i].Value
|
||||
}
|
||||
}
|
||||
|
||||
leftValue = min
|
||||
isTriggered = checkIsTriggered(leftValue, f.Operator, f.RightValue)
|
||||
return
|
||||
}
|
||||
|
||||
type AllFunction struct {
|
||||
Function
|
||||
Limit int
|
||||
Operator string
|
||||
RightValue float64
|
||||
}
|
||||
|
||||
func (f AllFunction) Compute(vs []*vos.HPoint) (leftValue vos.JsonFloat, isTriggered bool) {
|
||||
count := len(vs)
|
||||
if count < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
for i := 0; i < len(vs); i++ {
|
||||
isTriggered = checkIsTriggered(vs[i].Value, f.Operator, f.RightValue)
|
||||
if !isTriggered {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
leftValue = vs[0].Value
|
||||
return
|
||||
}
|
||||
|
||||
type SumFunction struct {
|
||||
Function
|
||||
Limit int
|
||||
Operator string
|
||||
RightValue float64
|
||||
}
|
||||
|
||||
func (f SumFunction) Compute(vs []*vos.HPoint) (leftValue vos.JsonFloat, isTriggered bool) {
|
||||
count := len(vs)
|
||||
if count < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
sum := vos.JsonFloat(0.0)
|
||||
for i := 0; i < count; i++ {
|
||||
sum += vs[i].Value
|
||||
}
|
||||
|
||||
leftValue = sum
|
||||
isTriggered = checkIsTriggered(leftValue, f.Operator, f.RightValue)
|
||||
return
|
||||
}
|
||||
|
||||
type AvgFunction struct {
|
||||
Function
|
||||
Limit int
|
||||
Operator string
|
||||
RightValue float64
|
||||
}
|
||||
|
||||
func (f AvgFunction) Compute(vs []*vos.HPoint) (leftValue vos.JsonFloat, isTriggered bool) {
|
||||
vsLen := len(vs)
|
||||
if vsLen < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
sum := vos.JsonFloat(0.0)
|
||||
|
||||
for i := 0; i < vsLen; i++ {
|
||||
sum += vs[i].Value
|
||||
}
|
||||
|
||||
leftValue = sum / vos.JsonFloat(vsLen)
|
||||
isTriggered = checkIsTriggered(leftValue, f.Operator, f.RightValue)
|
||||
return
|
||||
}
|
||||
|
||||
type StddevFunction struct {
|
||||
Function
|
||||
Num int
|
||||
Limit int
|
||||
}
|
||||
|
||||
func (f StddevFunction) Compute(vs []*vos.HPoint) (leftValue vos.JsonFloat, isTriggered bool) {
|
||||
var sum float64
|
||||
vsLen := len(vs)
|
||||
if vsLen < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
for i := 0; i < vsLen; i++ {
|
||||
sum += float64(vs[i].Value)
|
||||
}
|
||||
mean := sum / float64(vsLen)
|
||||
|
||||
var num float64
|
||||
for i := 0; i < vsLen; i++ {
|
||||
num += math.Pow(float64(vs[i].Value)-mean, 2)
|
||||
}
|
||||
|
||||
std := math.Sqrt(num / float64(vsLen))
|
||||
|
||||
upperBound := mean + std*float64(f.Num)
|
||||
lowerBound := mean - std*float64(f.Num)
|
||||
|
||||
leftValue = vs[0].Value
|
||||
isTriggered = checkIsTriggered(leftValue, "<", lowerBound) || checkIsTriggered(leftValue, ">", upperBound)
|
||||
return
|
||||
}
|
||||
|
||||
type DiffFunction struct {
|
||||
Function
|
||||
Limit int
|
||||
Operator string
|
||||
RightValue float64
|
||||
}
|
||||
|
||||
// 只要有一个点的diff触发阈值,就报警
|
||||
func (f DiffFunction) Compute(vs []*vos.HPoint) (leftValue vos.JsonFloat, isTriggered bool) {
|
||||
vsLen := len(vs)
|
||||
if vsLen < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
first := vs[0].Value
|
||||
|
||||
isTriggered = false
|
||||
for i := 1; i < vsLen; i++ {
|
||||
// diff是当前值减去历史值
|
||||
leftValue = first - vs[i].Value
|
||||
isTriggered = checkIsTriggered(leftValue, f.Operator, f.RightValue)
|
||||
if isTriggered {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// pdiff(#3)
|
||||
type PDiffFunction struct {
|
||||
Function
|
||||
Limit int
|
||||
Operator string
|
||||
RightValue float64
|
||||
}
|
||||
|
||||
func (f PDiffFunction) Compute(vs []*vos.HPoint) (leftValue vos.JsonFloat, isTriggered bool) {
|
||||
vsLen := len(vs)
|
||||
if vsLen < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
first := vs[0].Value
|
||||
isTriggered = false
|
||||
for i := 1; i < len(vs); i++ {
|
||||
if vs[i].Value == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
leftValue = (first - vs[i].Value) / vs[i].Value * 100.0
|
||||
isTriggered = checkIsTriggered(leftValue, f.Operator, f.RightValue)
|
||||
if isTriggered {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
type HappenFunction struct {
|
||||
Function
|
||||
Num int
|
||||
Limit int
|
||||
Operator string
|
||||
RightValue float64
|
||||
}
|
||||
|
||||
func (f HappenFunction) Compute(vs []*vos.HPoint) (leftValue vos.JsonFloat, isTriggered bool) {
|
||||
for n, i := 0, 0; i < len(vs); i++ {
|
||||
if checkIsTriggered(vs[i].Value, f.Operator, f.RightValue) {
|
||||
n++
|
||||
if n == f.Num {
|
||||
isTriggered = true
|
||||
leftValue = vs[i].Value
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type CAvgAbsFunction struct {
|
||||
Function
|
||||
Limit int
|
||||
Operator string
|
||||
RightValue float64
|
||||
CompareValue float64
|
||||
}
|
||||
|
||||
func (f CAvgAbsFunction) Compute(vs []*vos.HPoint) (leftValue vos.JsonFloat, isTriggered bool) {
|
||||
vsLen := len(vs)
|
||||
if vsLen < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
sum := vos.JsonFloat(0.0)
|
||||
|
||||
for i := 0; i < vsLen; i++ {
|
||||
sum += vs[i].Value
|
||||
}
|
||||
|
||||
value := sum / vos.JsonFloat(vsLen)
|
||||
leftValue = vos.JsonFloat(math.Abs(float64(value) - float64(f.CompareValue)))
|
||||
|
||||
isTriggered = checkIsTriggered(leftValue, f.Operator, f.RightValue)
|
||||
return
|
||||
}
|
||||
|
||||
type CAvgFunction struct {
|
||||
Function
|
||||
Limit int
|
||||
Operator string
|
||||
RightValue float64
|
||||
CompareValue float64
|
||||
}
|
||||
|
||||
func (f CAvgFunction) Compute(vs []*vos.HPoint) (leftValue vos.JsonFloat, isTriggered bool) {
|
||||
vsLen := len(vs)
|
||||
if vsLen < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
sum := vos.JsonFloat(0.0)
|
||||
for i := 0; i < vsLen; i++ {
|
||||
sum += vs[i].Value
|
||||
}
|
||||
|
||||
leftValue = sum/vos.JsonFloat(vsLen) - vos.JsonFloat(f.CompareValue)
|
||||
|
||||
isTriggered = checkIsTriggered(leftValue, f.Operator, f.RightValue)
|
||||
return
|
||||
}
|
||||
|
||||
type CAvgRateAbsFunction struct {
|
||||
Function
|
||||
Limit int
|
||||
Operator string
|
||||
RightValue float64
|
||||
CompareValue float64
|
||||
}
|
||||
|
||||
func (f CAvgRateAbsFunction) Compute(vs []*vos.HPoint) (leftValue vos.JsonFloat, isTriggered bool) {
|
||||
vsLen := len(vs)
|
||||
if vsLen < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
sum := vos.JsonFloat(0.0)
|
||||
for i := 0; i < vsLen; i++ {
|
||||
sum += vs[i].Value
|
||||
}
|
||||
|
||||
value := sum / vos.JsonFloat(vsLen)
|
||||
leftValue = vos.JsonFloat(math.Abs((float64(value)-float64(f.CompareValue))/f.CompareValue)) * 100.00
|
||||
|
||||
isTriggered = checkIsTriggered(leftValue, f.Operator, f.RightValue)
|
||||
return
|
||||
}
|
||||
|
||||
type CAvgRateFunction struct {
|
||||
Function
|
||||
Limit int
|
||||
Operator string
|
||||
RightValue float64
|
||||
CompareValue float64
|
||||
}
|
||||
|
||||
func (f CAvgRateFunction) Compute(vs []*vos.HPoint) (leftValue vos.JsonFloat, isTriggered bool) {
|
||||
vsLen := len(vs)
|
||||
if vsLen < 1 {
|
||||
return
|
||||
}
|
||||
|
||||
sum := vos.JsonFloat(0.0)
|
||||
for i := 0; i < vsLen; i++ {
|
||||
sum += vs[i].Value
|
||||
}
|
||||
|
||||
value := sum / vos.JsonFloat(vsLen)
|
||||
leftValue = (value - vos.JsonFloat(f.CompareValue)) / vos.JsonFloat(math.Abs(f.CompareValue)) * 100.00
|
||||
|
||||
isTriggered = checkIsTriggered(leftValue, f.Operator, f.RightValue)
|
||||
return
|
||||
}
|
||||
|
||||
func ParseFuncFromString(str string, span []interface{}, operator string, rightValue float64) (fn Function, err error) {
|
||||
if str == "" {
|
||||
return nil, fmt.Errorf("func can not be null")
|
||||
}
|
||||
limit := span[0].(int)
|
||||
|
||||
switch str {
|
||||
case "max":
|
||||
fn = &MaxFunction{Limit: limit, Operator: operator, RightValue: rightValue}
|
||||
case "min":
|
||||
fn = &MinFunction{Limit: limit, Operator: operator, RightValue: rightValue}
|
||||
case "all":
|
||||
fn = &AllFunction{Limit: limit, Operator: operator, RightValue: rightValue}
|
||||
case "sum":
|
||||
fn = &SumFunction{Limit: limit, Operator: operator, RightValue: rightValue}
|
||||
case "avg":
|
||||
fn = &AvgFunction{Limit: limit, Operator: operator, RightValue: rightValue}
|
||||
case "stddev":
|
||||
fn = &StddevFunction{Limit: limit, Num: span[1].(int)}
|
||||
case "diff":
|
||||
fn = &DiffFunction{Limit: limit, Operator: operator, RightValue: rightValue}
|
||||
case "pdiff":
|
||||
fn = &PDiffFunction{Limit: limit, Operator: operator, RightValue: rightValue}
|
||||
case "happen":
|
||||
fn = &HappenFunction{Limit: limit, Num: span[1].(int), Operator: operator, RightValue: rightValue}
|
||||
case "c_avg":
|
||||
fn = &CAvgFunction{Limit: limit, CompareValue: span[1].(float64), Operator: operator, RightValue: rightValue}
|
||||
case "c_avg_abs":
|
||||
fn = &CAvgAbsFunction{Limit: limit, CompareValue: span[1].(float64), Operator: operator, RightValue: rightValue}
|
||||
case "c_avg_rate":
|
||||
fn = &CAvgRateFunction{Limit: limit, CompareValue: span[1].(float64), Operator: operator, RightValue: rightValue}
|
||||
case "c_avg_rate_abs":
|
||||
fn = &CAvgRateAbsFunction{Limit: limit, CompareValue: span[1].(float64), Operator: operator, RightValue: rightValue}
|
||||
default:
|
||||
err = fmt.Errorf("not_supported_method")
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func checkIsTriggered(leftValue vos.JsonFloat, operator string, rightValue float64) (isTriggered bool) {
|
||||
switch operator {
|
||||
case "=", "==":
|
||||
isTriggered = math.Abs(float64(leftValue)-rightValue) < 0.0001
|
||||
case "!=":
|
||||
isTriggered = math.Abs(float64(leftValue)-rightValue) > 0.0001
|
||||
case "<":
|
||||
isTriggered = float64(leftValue) < rightValue
|
||||
case "<=":
|
||||
isTriggered = float64(leftValue) <= rightValue
|
||||
case ">":
|
||||
isTriggered = float64(leftValue) > rightValue
|
||||
case ">=":
|
||||
isTriggered = float64(leftValue) >= rightValue
|
||||
}
|
||||
|
||||
return
|
||||
}
|
545
judge/handler.go
545
judge/handler.go
|
@ -1,545 +0,0 @@
|
|||
// Copyright 2017 Xiaomi, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package judge
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/toolkits/pkg/logger"
|
||||
"github.com/toolkits/pkg/str"
|
||||
|
||||
"github.com/didi/nightingale/v5/cache"
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
"github.com/didi/nightingale/v5/vos"
|
||||
)
|
||||
|
||||
var (
|
||||
bufferPool = sync.Pool{New: func() interface{} { return new(bytes.Buffer) }}
|
||||
|
||||
EVENT_ALERT = "alert"
|
||||
EVENT_RECOVER = "recovery"
|
||||
)
|
||||
|
||||
func Send(points []*vos.MetricPoint) {
|
||||
for i := range points {
|
||||
alertRules := getMatchAlertRules(points[i])
|
||||
|
||||
rulesCount := len(alertRules)
|
||||
if rulesCount == 0 {
|
||||
// 这个监控数据没有关联任何告警策略,省事了不用处理
|
||||
continue
|
||||
}
|
||||
|
||||
logger.Debugf("[point_match_alertRules][point:%+v][alertRuleNum:%+v]", points[i], rulesCount)
|
||||
// 不同的告警规则,alert_duration字段大小不同,找到最大的,按照最大的值来缓存历史数据
|
||||
var maxAliveDuration = 0
|
||||
for j := range alertRules {
|
||||
if maxAliveDuration < alertRules[j].AlertDuration {
|
||||
maxAliveDuration = alertRules[j].AlertDuration
|
||||
}
|
||||
}
|
||||
|
||||
ll := PointCaches[points[i].PK[0:2]].PutPoint(points[i], int64(maxAliveDuration))
|
||||
|
||||
for j := range alertRules {
|
||||
go ToJudge(ll, alertRules[j], points[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func getMatchAlertRules(point *vos.MetricPoint) []*models.AlertRule {
|
||||
alertRules := cache.AlertRulesByMetric.GetBy(point.Metric)
|
||||
matchRules := make([]*models.AlertRule, 0, len(alertRules))
|
||||
|
||||
for i := range alertRules {
|
||||
if alertRules[i].Type == models.PULL {
|
||||
continue
|
||||
}
|
||||
|
||||
if matchAlertRule(point, alertRules[i]) {
|
||||
matchRules = append(matchRules, alertRules[i])
|
||||
}
|
||||
}
|
||||
|
||||
return matchRules
|
||||
}
|
||||
|
||||
func matchAlertRule(item *vos.MetricPoint, alertRule *models.AlertRule) bool {
|
||||
//TODO 过滤方式待优化
|
||||
for _, filter := range alertRule.PushExpr.ResFilters {
|
||||
if !valueMatch(item.Ident, filter.Func, filter.Params) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
for _, filter := range alertRule.PushExpr.TagFilters {
|
||||
value, exists := item.TagsMap[filter.Key]
|
||||
if !exists {
|
||||
return false
|
||||
}
|
||||
|
||||
if !valueMatch(value, filter.Func, filter.Params) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func valueMatch(value, f string, params []string) bool {
|
||||
switch f {
|
||||
|
||||
case "InClasspath":
|
||||
for i := range params {
|
||||
if cache.ResClasspath.Exists(value, params[i]) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
case "NotInClasspath":
|
||||
for i := range params {
|
||||
if cache.ResClasspath.Exists(value, params[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
case "InClasspathPrefix":
|
||||
classpaths := cache.ResClasspath.GetValues(value)
|
||||
for _, classpath := range classpaths {
|
||||
for i := range params {
|
||||
if strings.HasPrefix(classpath, params[i]) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
case "NotInClasspathPrefix":
|
||||
classpaths := cache.ResClasspath.GetValues(value)
|
||||
for _, classpath := range classpaths {
|
||||
for i := range params {
|
||||
if strings.HasPrefix(classpath, params[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
case "InList":
|
||||
for i := range params {
|
||||
if value == params[i] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
case "NotInList":
|
||||
for i := range params {
|
||||
if value == params[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
case "InResourceList":
|
||||
for i := range params {
|
||||
if value == params[i] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
case "NotInResourceList":
|
||||
for i := range params {
|
||||
if value == params[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
case "HasPrefixString":
|
||||
for i := range params {
|
||||
if strings.HasPrefix(value, params[i]) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
case "NoPrefixString":
|
||||
for i := range params {
|
||||
if strings.HasPrefix(value, params[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
case "HasSuffixString":
|
||||
for i := range params {
|
||||
if strings.HasSuffix(value, params[i]) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
case "NoSuffixString":
|
||||
for i := range params {
|
||||
if strings.HasSuffix(value, params[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
case "ContainsString":
|
||||
for i := range params {
|
||||
if strings.Contains(value, params[i]) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
case "NotContainsString":
|
||||
for i := range params {
|
||||
if strings.Contains(value, params[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
case "MatchRegexp":
|
||||
for i := range params {
|
||||
r, _ := regexp.Compile(params[i])
|
||||
if r.MatchString(value) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
case "NotMatchRegexp":
|
||||
for i := range params {
|
||||
r, _ := regexp.Compile(params[i])
|
||||
if r.MatchString(value) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func ToJudge(linkedList *SafeLinkedList, stra *models.AlertRule, val *vos.MetricPoint) {
|
||||
logger.Debugf("[ToJudge.start][stra:%+v][val:%+v]", stra, val)
|
||||
now := val.Time
|
||||
|
||||
hps := linkedList.HistoryPoints(now - int64(stra.AlertDuration))
|
||||
if len(hps) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
historyArr := []vos.HistoryPoints{}
|
||||
statusArr := []bool{}
|
||||
eventInfo := ""
|
||||
value := ""
|
||||
|
||||
if len(stra.PushExpr.Exps) == 1 {
|
||||
for _, expr := range stra.PushExpr.Exps {
|
||||
history, info, lastValue, status := Judge(stra, expr, hps, val, now)
|
||||
statusArr = append(statusArr, status)
|
||||
|
||||
if value == "" {
|
||||
value = fmt.Sprintf("%s: %s", expr.Metric, lastValue)
|
||||
} else {
|
||||
value += fmt.Sprintf("; %s: %s", expr.Metric, lastValue)
|
||||
}
|
||||
|
||||
historyArr = append(historyArr, history)
|
||||
eventInfo += info
|
||||
}
|
||||
} else { //多个条件
|
||||
for _, expr := range stra.PushExpr.Exps {
|
||||
|
||||
respData, err := GetData(stra, expr, val, now)
|
||||
if err != nil {
|
||||
logger.Errorf("stra:%+v get query data err:%v", stra, err)
|
||||
return
|
||||
}
|
||||
if len(respData) <= 0 {
|
||||
logger.Errorf("stra:%+v get query data respData:%v err", stra, respData)
|
||||
return
|
||||
}
|
||||
|
||||
history, info, lastValue, status := Judge(stra, expr, respData, val, now)
|
||||
|
||||
statusArr = append(statusArr, status)
|
||||
if value == "" {
|
||||
value = fmt.Sprintf("%s: %s", expr.Metric, lastValue)
|
||||
} else {
|
||||
value += fmt.Sprintf("; %s: %s", expr.Metric, lastValue)
|
||||
}
|
||||
|
||||
historyArr = append(historyArr, history)
|
||||
if eventInfo == "" {
|
||||
eventInfo = info
|
||||
} else {
|
||||
if stra.PushExpr.TogetherOrAny == 0 {
|
||||
eventInfo += fmt.Sprintf(" & %s", info)
|
||||
} else if stra.PushExpr.TogetherOrAny == 1 {
|
||||
eventInfo += fmt.Sprintf(" || %s", info)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bs, err := json.Marshal(historyArr)
|
||||
if err != nil {
|
||||
logger.Errorf("Marshal history:%+v err:%v", historyArr, err)
|
||||
}
|
||||
|
||||
event := &models.AlertEvent{
|
||||
RuleId: stra.Id,
|
||||
RuleName: stra.Name,
|
||||
RuleNote: stra.Note,
|
||||
HashId: str.MD5(fmt.Sprintf("%d_%s", stra.Id, val.PK)),
|
||||
ResIdent: val.Ident,
|
||||
Priority: stra.Priority,
|
||||
HistoryPoints: bs,
|
||||
TriggerTime: now,
|
||||
Values: value,
|
||||
NotifyChannels: stra.NotifyChannels,
|
||||
NotifyGroups: stra.NotifyGroups,
|
||||
NotifyUsers: stra.NotifyUsers,
|
||||
RunbookUrl: stra.RunbookUrl,
|
||||
ReadableExpression: eventInfo,
|
||||
TagMap: val.TagsMap,
|
||||
}
|
||||
logger.Debugf("[ToJudge.event.create][statusArr:%v][type=push][stra:%+v][val:%+v][event:%+v]", statusArr, stra, val, event)
|
||||
sendEventIfNeed(statusArr, event, stra)
|
||||
}
|
||||
|
||||
func Judge(stra *models.AlertRule, exp models.Exp, historyData []*vos.HPoint, firstItem *vos.MetricPoint, now int64) (history vos.HistoryPoints, info string, lastValue string, status bool) {
|
||||
|
||||
var leftValue vos.JsonFloat
|
||||
if exp.Func == "stddev" {
|
||||
info = fmt.Sprintf(" %s (%s,%ds) %v", exp.Metric, exp.Func, stra.AlertDuration, exp.Params)
|
||||
} else if exp.Func == "happen" {
|
||||
info = fmt.Sprintf(" %s (%s,%ds) %v %s %v", exp.Metric, exp.Func, stra.AlertDuration, exp.Params, exp.Optr, exp.Threshold)
|
||||
} else {
|
||||
info = fmt.Sprintf(" %s(%s,%ds) %s %v", exp.Metric, exp.Func, stra.AlertDuration, exp.Optr, exp.Threshold)
|
||||
}
|
||||
|
||||
leftValue, status = judgeItemWithStrategy(stra, historyData, exp, firstItem, now)
|
||||
|
||||
lastValue = "null"
|
||||
if !math.IsNaN(float64(leftValue)) {
|
||||
lastValue = strconv.FormatFloat(float64(leftValue), 'f', -1, 64)
|
||||
}
|
||||
|
||||
history = vos.HistoryPoints{
|
||||
Metric: exp.Metric,
|
||||
Tags: firstItem.TagsMap,
|
||||
Points: historyData,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func judgeItemWithStrategy(stra *models.AlertRule, historyData []*vos.HPoint, exp models.Exp, firstItem *vos.MetricPoint, now int64) (leftValue vos.JsonFloat, isTriggered bool) {
|
||||
straFunc := exp.Func
|
||||
|
||||
var straParam []interface{}
|
||||
|
||||
straParam = append(straParam, stra.AlertDuration)
|
||||
|
||||
switch straFunc {
|
||||
case "happen", "stddev":
|
||||
if len(exp.Params) < 1 {
|
||||
logger.Errorf("stra:%d exp:%+v stra param is null", stra.Id, exp)
|
||||
return
|
||||
}
|
||||
straParam = append(straParam, exp.Params[0])
|
||||
case "c_avg", "c_avg_abs", "c_avg_rate", "c_avg_rate_abs":
|
||||
if len(exp.Params) < 1 {
|
||||
logger.Errorf("stra:%d exp:%+v stra param is null", stra.Id, exp)
|
||||
return
|
||||
}
|
||||
|
||||
hisD, err := GetData(stra, exp, firstItem, now-int64(exp.Params[0]))
|
||||
if err != nil {
|
||||
logger.Errorf("stra:%v %+v get compare data err:%v", stra.Id, exp, err)
|
||||
return
|
||||
}
|
||||
|
||||
if len(hisD) != 1 {
|
||||
logger.Errorf("stra:%d %+v get compare data err, respItems:%v", stra.Id, exp, hisD)
|
||||
return
|
||||
}
|
||||
|
||||
var sum float64
|
||||
for _, i := range hisD {
|
||||
sum += float64(i.Value)
|
||||
}
|
||||
|
||||
//环比数据的平均值
|
||||
straParam = append(straParam, sum/float64(len(hisD)))
|
||||
}
|
||||
|
||||
fn, err := ParseFuncFromString(straFunc, straParam, exp.Optr, exp.Threshold)
|
||||
if err != nil {
|
||||
logger.Errorf("stra:%d %+v parse func fail: %v", stra.Id, exp, err)
|
||||
return
|
||||
}
|
||||
|
||||
return fn.Compute(historyData)
|
||||
}
|
||||
|
||||
func GetData(stra *models.AlertRule, exp models.Exp, firstItem *vos.MetricPoint, now int64) ([]*vos.HPoint, error) {
|
||||
var respData []*vos.HPoint
|
||||
var err error
|
||||
|
||||
//多查一些数据,防止由于查询不到最新点,导致点数不够
|
||||
start := now - int64(stra.AlertDuration) - 2
|
||||
// 这里的参数肯定只有一个
|
||||
queryParam, err := NewQueryRequest(firstItem.Ident, exp.Metric, firstItem.TagsMap, start, now)
|
||||
|
||||
if err != nil {
|
||||
return respData, err
|
||||
}
|
||||
respData = Query(queryParam)
|
||||
logger.Debugf("[exp:%+v][queryParam:%+v][respData:%+v]\n", exp, queryParam, respData)
|
||||
return respData, err
|
||||
}
|
||||
|
||||
// 虽然最近的数据确实产生了事件(产生事件很频繁),但是未必一定要发送,只有告警/恢复状态发生变化的时候才需发送
|
||||
func sendEventIfNeed(status []bool, event *models.AlertEvent, stra *models.AlertRule) {
|
||||
isTriggered := true
|
||||
|
||||
if stra.Type == 0 {
|
||||
// 只判断push型的
|
||||
switch stra.PushExpr.TogetherOrAny {
|
||||
|
||||
case 0:
|
||||
// 全部触发
|
||||
for _, s := range status {
|
||||
isTriggered = isTriggered && s
|
||||
}
|
||||
|
||||
case 1:
|
||||
// 任意一个触发
|
||||
isTriggered = false
|
||||
for _, s := range status {
|
||||
if s == true {
|
||||
isTriggered = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
now := time.Now().Unix()
|
||||
lastEvent, exists := LastEvents.Get(event.RuleId, event.HashId)
|
||||
|
||||
switch event.IsPromePull {
|
||||
case 0:
|
||||
// push型的 && 与条件型的
|
||||
if exists && lastEvent.IsPromePull == 1 {
|
||||
// 之前内存中的事件是pull型的,先清空内存中的事件
|
||||
LastEvents.Del(event.RuleId, event.HashId)
|
||||
}
|
||||
|
||||
if isTriggered {
|
||||
// 新告警或者上次是恢复事件,都需要立即发送
|
||||
if !exists || lastEvent.IsRecov() {
|
||||
event.MarkAlert()
|
||||
SendEvent(event)
|
||||
}
|
||||
} else {
|
||||
// 上次是告警事件,现在恢复了,自然需要通知
|
||||
if exists && lastEvent.IsAlert() {
|
||||
event.MarkRecov()
|
||||
SendEvent(event)
|
||||
}
|
||||
}
|
||||
case 1:
|
||||
// pull型的,产生的事件一定是触发了阈值的,即这个case里不存在recovery的场景,recovery的场景用resolve_timeout的cron来处理
|
||||
if exists && lastEvent.IsPromePull == 0 {
|
||||
// 之前内存中的事件是push型的,先清空内存中的事件
|
||||
LastEvents.Del(event.RuleId, event.HashId)
|
||||
}
|
||||
|
||||
// 1. 第一次来,并且AlertDuration=0,直接发送
|
||||
// 2. 触发累计到AlertDuration时长后触发一条
|
||||
if !exists {
|
||||
// 这是个新事件,之前未曾产生过的
|
||||
if stra.AlertDuration == 0 {
|
||||
// 代表prometheus rule for 配置为0,直接发送
|
||||
event.LastSend = true
|
||||
event.MarkAlert()
|
||||
SendEvent(event)
|
||||
} else {
|
||||
// 只有一条事件,显然无法满足for AlertDuration的时间,放到内存里等待
|
||||
LastEvents.Set(event)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// 内存里有事件,虽然AlertDuration是0但是上次没有发过(可能是中间调整过AlertDuration,比如从某个大于0的值调整为0)
|
||||
if stra.AlertDuration == 0 && !lastEvent.LastSend {
|
||||
event.LastSend = true
|
||||
event.MarkAlert()
|
||||
SendEvent(event)
|
||||
return
|
||||
}
|
||||
|
||||
// 内存里有事件,AlertDuration也是大于0的,需要判断Prometheus里的for的逻辑
|
||||
if now-lastEvent.TriggerTime < int64(stra.AlertDuration) {
|
||||
// 距离上次告警的时间小于告警统计周期,即不满足for的条件,不产生告警通知
|
||||
return
|
||||
}
|
||||
|
||||
logger.Debugf("[lastEvent.LastSend:%+v][event.LastSend:%+v][now:%+v][lastEvent.TriggerTime:%+v][stra.AlertDuration:%+v][now-lastEvent.TriggerTime:%+v]\n",
|
||||
lastEvent.LastSend,
|
||||
event.LastSend,
|
||||
now,
|
||||
lastEvent.TriggerTime,
|
||||
stra.AlertDuration,
|
||||
now-lastEvent.TriggerTime,
|
||||
)
|
||||
|
||||
// 满足for的条件了,应产生事件,但是未必一定要发送,上次没发送或者上次是恢复这次才发送,即保证只发一条
|
||||
if !lastEvent.LastSend || lastEvent.IsRecov() {
|
||||
event.LastSend = true
|
||||
event.MarkAlert()
|
||||
SendEvent(event)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func SendEvent(event *models.AlertEvent) {
|
||||
// update last event
|
||||
LastEvents.Set(event)
|
||||
|
||||
if event.IsAlert() {
|
||||
// 只有是告警事件,才需要判断是否重复发送的问题,如果是恢复事件,就直接交给后续alert处理
|
||||
ae, err := models.AlertEventGet("hash_id = ?", event.HashId)
|
||||
if err == nil && ae != nil {
|
||||
logger.Debugf("[event exists do not send again][type:%+v][event:%+v]", event.IsPromePull, event)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
ok := EventQueue.PushFront(event)
|
||||
if !ok {
|
||||
logger.Errorf("push event:%v err", event)
|
||||
}
|
||||
logger.Debugf("[SendEvent.event.success][type:%+v][event:%+v]", event.IsPromePull, event)
|
||||
}
|
122
judge/history.go
122
judge/history.go
|
@ -1,122 +0,0 @@
|
|||
// Copyright 2017 Xiaomi, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package judge
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/didi/nightingale/v5/vos"
|
||||
)
|
||||
|
||||
type PointCache struct {
|
||||
sync.RWMutex
|
||||
M map[string]*SafeLinkedList
|
||||
}
|
||||
|
||||
func NewPointCache() *PointCache {
|
||||
return &PointCache{M: make(map[string]*SafeLinkedList)}
|
||||
}
|
||||
|
||||
func (pc *PointCache) Get(key string) (*SafeLinkedList, bool) {
|
||||
pc.RLock()
|
||||
defer pc.RUnlock()
|
||||
val, ok := pc.M[key]
|
||||
return val, ok
|
||||
}
|
||||
|
||||
func (pc *PointCache) Set(key string, val *SafeLinkedList) {
|
||||
pc.Lock()
|
||||
defer pc.Unlock()
|
||||
pc.M[key] = val
|
||||
}
|
||||
|
||||
func (pc *PointCache) Len() int {
|
||||
pc.RLock()
|
||||
defer pc.RUnlock()
|
||||
return len(pc.M)
|
||||
}
|
||||
|
||||
func (pc *PointCache) CleanStale(before int64) {
|
||||
var keys []string
|
||||
|
||||
pc.RLock()
|
||||
for key, L := range pc.M {
|
||||
front := L.Front()
|
||||
if front == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if front.Value.(*vos.MetricPoint).Time < before {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
}
|
||||
pc.RUnlock()
|
||||
|
||||
pc.BatchDelete(keys)
|
||||
}
|
||||
|
||||
func (pc *PointCache) BatchDelete(keys []string) {
|
||||
count := len(keys)
|
||||
if count == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
pc.Lock()
|
||||
defer pc.Unlock()
|
||||
for i := 0; i < count; i++ {
|
||||
delete(pc.M, keys[i])
|
||||
}
|
||||
}
|
||||
|
||||
func (pc *PointCache) PutPoint(p *vos.MetricPoint, maxAliveDuration int64) *SafeLinkedList {
|
||||
linkedList, exists := pc.Get(p.PK)
|
||||
if exists {
|
||||
linkedList.PushFrontAndMaintain(p, maxAliveDuration)
|
||||
} else {
|
||||
NL := list.New()
|
||||
NL.PushFront(p)
|
||||
linkedList = &SafeLinkedList{L: NL}
|
||||
pc.Set(p.PK, linkedList)
|
||||
}
|
||||
|
||||
return linkedList
|
||||
}
|
||||
|
||||
// 这是个线程不安全的大Map,需要提前初始化好
|
||||
var PointCaches = make(map[string]*PointCache)
|
||||
var pointChars = []string{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f"}
|
||||
var pointHeadKeys = make([]string, 0, 256)
|
||||
|
||||
func initPointCaches() {
|
||||
for i := 0; i < 16; i++ {
|
||||
for j := 0; j < 16; j++ {
|
||||
pointHeadKeys = append(pointHeadKeys, pointChars[i]+pointChars[j])
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < 256; i++ {
|
||||
PointCaches[pointHeadKeys[i]] = NewPointCache()
|
||||
}
|
||||
}
|
||||
|
||||
func CleanStalePoints() {
|
||||
// 监控数据2天都没关联到任何告警策略,说明对应的告警策略已经删除了
|
||||
before := time.Now().Unix() - 3600*24*2
|
||||
for i := 0; i < 256; i++ {
|
||||
PointCaches[pointHeadKeys[i]].CleanStale(before)
|
||||
}
|
||||
}
|
118
judge/judge.go
118
judge/judge.go
|
@ -1,118 +0,0 @@
|
|||
package judge
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/didi/nightingale/v5/cache"
|
||||
"github.com/didi/nightingale/v5/config"
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
"github.com/didi/nightingale/v5/naming"
|
||||
"github.com/toolkits/pkg/container/list"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
var (
|
||||
// 这个内存Queue放到judge的包里或alert的包里感觉都可以
|
||||
// 放到judge的包里,即当前的做法,相当于把alert看做judge的一个附属小功能
|
||||
// 这个Queue的核心作用就是削峰填谷,应对突然产生的大面积事件
|
||||
EventQueue *list.SafeListLimited
|
||||
|
||||
// 上次同步全量告警规则的时间,全量同步都没做过,我这也不用处理PULL的规则了
|
||||
lastSyncTime int64
|
||||
)
|
||||
|
||||
func Start(ctx context.Context) {
|
||||
// PUSH型的告警引擎,依赖内存里缓存的数据来做告警判断,两层map减小锁粒度
|
||||
initPointCaches()
|
||||
|
||||
// 把数据库中的未恢复告警同步一份到内存中,便于后续判断告警是否应该发送
|
||||
LastEvents.Init()
|
||||
|
||||
// 默认初始化的大小是1000万,相当于内存里有1000万事件,应该够用了
|
||||
EventQueue = list.NewSafeListLimited(10000000)
|
||||
|
||||
// 开始心跳,对于PUSH型的数据我有策略了自然就可以处理了
|
||||
if err := heartbeat(config.Config.Heartbeat.LocalAddr); err != nil {
|
||||
fmt.Println(err)
|
||||
logger.Close()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// 启动心跳goroutinue,如果挂了,trans可以及时感知
|
||||
go loopHeartbeat()
|
||||
|
||||
// PULL型的策略不着急,等一段时间(等哈希环是稳态的)再开始周期性干活
|
||||
go syncPullRules(ctx)
|
||||
|
||||
// 告警策略删除之后,针对这些告警策略缓存的监控数据要被清理
|
||||
go loopCleanStalePoints()
|
||||
}
|
||||
|
||||
func syncPullRules(ctx context.Context) {
|
||||
// 先等一会再干活,等大部分judge都上报心跳过了,哈希环不变了
|
||||
time.Sleep(time.Second * 33)
|
||||
for {
|
||||
syncPullRulesOnce(ctx)
|
||||
time.Sleep(time.Second * 9)
|
||||
}
|
||||
}
|
||||
|
||||
func syncPullRulesOnce(ctx context.Context) {
|
||||
if cache.AlertRulesByMetric.LastSync == lastSyncTime {
|
||||
return
|
||||
}
|
||||
|
||||
// 根据我自己的标识,去查找属于我的PULL型告警规则
|
||||
ident := config.Config.Heartbeat.LocalAddr
|
||||
|
||||
rules := cache.AlertRules.Pulls()
|
||||
count := len(rules)
|
||||
mines := make([]models.AlertRule, 0, count)
|
||||
logger.Debugf("[got_one_pull_rule_for_all][ruleNum:%v]", count)
|
||||
for i := 0; i < count; i++ {
|
||||
|
||||
instance, err := naming.HashRing.GetNode(fmt.Sprint(rules[i].Id))
|
||||
if err != nil {
|
||||
logger.Warningf("hashring: sharding pull rule(%d) fail: %v", rules[i].Id, err)
|
||||
continue
|
||||
}
|
||||
logger.Debugf("[got_one_pull_rule_hash_result][instance:%v][ident:%v][rule:%v]", instance, ident, rules[i])
|
||||
if instance == ident {
|
||||
// 属于我的
|
||||
mines = append(mines, *rules[i])
|
||||
logger.Debugf("[got_one_pull_rule_for_me][rule:%v]", rules[i])
|
||||
}
|
||||
}
|
||||
|
||||
pullRuleManager.SyncRules(ctx, mines)
|
||||
lastSyncTime = cache.AlertRulesByMetric.LastSync
|
||||
}
|
||||
|
||||
func loopHeartbeat() {
|
||||
interval := time.Duration(config.Config.Heartbeat.Interval) * time.Millisecond
|
||||
|
||||
for {
|
||||
time.Sleep(interval)
|
||||
if err := heartbeat(config.Config.Heartbeat.LocalAddr); err != nil {
|
||||
logger.Warning(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func heartbeat(endpoint string) error {
|
||||
err := models.InstanceHeartbeat(config.EndpointName, endpoint)
|
||||
if err != nil {
|
||||
return fmt.Errorf("mysql.error: instance(service=%s, endpoint=%s) heartbeat fail: %v", config.EndpointName, endpoint, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func loopCleanStalePoints() {
|
||||
for {
|
||||
time.Sleep(time.Hour)
|
||||
CleanStalePoints()
|
||||
}
|
||||
}
|
|
@ -1,119 +0,0 @@
|
|||
package judge
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/didi/nightingale/v5/models"
|
||||
"github.com/toolkits/pkg/logger"
|
||||
)
|
||||
|
||||
// rule_id -> hash_id -> *models.AlertEvent
|
||||
type SafeEventMap struct {
|
||||
sync.RWMutex
|
||||
M map[int64]map[string]*models.AlertEvent
|
||||
}
|
||||
|
||||
var (
|
||||
LastEvents = &SafeEventMap{M: make(map[int64]map[string]*models.AlertEvent)}
|
||||
)
|
||||
|
||||
func (s *SafeEventMap) Get(ruleId int64, hashId string) (*models.AlertEvent, bool) {
|
||||
s.RLock()
|
||||
defer s.RUnlock()
|
||||
|
||||
m, has := s.M[ruleId]
|
||||
if !has {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
event, has := m[hashId]
|
||||
return event, has
|
||||
}
|
||||
|
||||
func (s *SafeEventMap) Set(event *models.AlertEvent) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
_, has := s.M[event.RuleId]
|
||||
if !has {
|
||||
m := make(map[string]*models.AlertEvent)
|
||||
m[event.HashId] = event
|
||||
s.M[event.RuleId] = m
|
||||
} else {
|
||||
s.M[event.RuleId][event.HashId] = event
|
||||
}
|
||||
}
|
||||
|
||||
func (s *SafeEventMap) Init() {
|
||||
aes, err := models.AlertEventGetAll()
|
||||
if err != nil {
|
||||
fmt.Println("load all alert_event fail:", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if len(aes) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
data := make(map[int64]map[string]*models.AlertEvent)
|
||||
for i := 0; i < len(aes); i++ {
|
||||
event := aes[i]
|
||||
_, has := data[event.RuleId]
|
||||
if !has {
|
||||
m := make(map[string]*models.AlertEvent)
|
||||
m[event.HashId] = event
|
||||
data[event.RuleId] = m
|
||||
} else {
|
||||
data[event.RuleId][event.HashId] = event
|
||||
}
|
||||
}
|
||||
|
||||
s.Lock()
|
||||
s.M = data
|
||||
s.Unlock()
|
||||
}
|
||||
|
||||
func (s *SafeEventMap) Del(ruleId int64, hashId string) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
_, has := s.M[ruleId]
|
||||
if !has {
|
||||
return
|
||||
}
|
||||
|
||||
delete(s.M[ruleId], hashId)
|
||||
}
|
||||
|
||||
func (s *SafeEventMap) DeleteOrSendRecovery(ruleId int64, toKeepKeys map[string]struct{}) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
m, has := s.M[ruleId]
|
||||
if !has {
|
||||
return
|
||||
}
|
||||
|
||||
for k, ev := range m {
|
||||
if _, loaded := toKeepKeys[k]; loaded {
|
||||
continue
|
||||
}
|
||||
|
||||
// 如果因为promql修改,导致本来是告警状态变成了恢复,也接受
|
||||
logger.Debugf("[to_del][ev.IsRecovery:%+v][ev.LastSend:%+v]", ev.IsRecovery, ev.LastSend)
|
||||
|
||||
// promql 没查询到结果,需要将告警标记为已恢复并发送
|
||||
// 同时需要满足 已经发送过触发信息,并且时间差满足 大于AlertDuration
|
||||
// 为了避免 发送告警后 一个点 断点了就立即发送恢复信息的case
|
||||
now := time.Now().Unix()
|
||||
if ev.IsAlert() && ev.LastSend && now-ev.TriggerTime > ev.AlertDuration {
|
||||
logger.Debugf("[prom.alert.MarkRecov][ev.RuleName:%v]", ev.RuleName)
|
||||
ev.MarkRecov()
|
||||
EventQueue.PushFront(ev)
|
||||
delete(s.M[ruleId], k)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,164 +0,0 @@
|
|||
// Copyright 2017 Xiaomi, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package judge
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"sync"
|
||||
|
||||
"github.com/didi/nightingale/v5/vos"
|
||||
)
|
||||
|
||||
type SafeLinkedList struct {
|
||||
sync.RWMutex
|
||||
L *list.List
|
||||
}
|
||||
|
||||
func (ll *SafeLinkedList) Front() *list.Element {
|
||||
ll.RLock()
|
||||
defer ll.RUnlock()
|
||||
return ll.L.Front()
|
||||
}
|
||||
|
||||
func (ll *SafeLinkedList) Len() int {
|
||||
ll.RLock()
|
||||
defer ll.RUnlock()
|
||||
return ll.L.Len()
|
||||
}
|
||||
|
||||
func (ll *SafeLinkedList) PushFrontAndMaintain(v *vos.MetricPoint, maintainDuration int64) {
|
||||
ll.Lock()
|
||||
defer ll.Unlock()
|
||||
|
||||
sz := ll.L.Len()
|
||||
lastPointTs := ll.L.Front().Value.(*vos.MetricPoint).Time
|
||||
earliestTs := v.Time - maintainDuration
|
||||
|
||||
if sz > 0 {
|
||||
// 新push上来的数据有可能重复了,或者timestamp不对,这种数据要丢掉
|
||||
if v.Time <= lastPointTs {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
ll.L.PushFront(v)
|
||||
|
||||
sz++
|
||||
|
||||
for i := 0; i < sz; i++ {
|
||||
if ll.L.Back().Value.(*vos.MetricPoint).Time >= earliestTs {
|
||||
break
|
||||
}
|
||||
//最前面的点已经不在告警策略时间周期内,丢弃掉
|
||||
ll.L.Remove(ll.L.Back())
|
||||
}
|
||||
}
|
||||
|
||||
func (ll *SafeLinkedList) HistoryPoints(smallestTime int64) []*vos.HPoint {
|
||||
size := ll.Len()
|
||||
if size == 0 {
|
||||
return []*vos.HPoint{}
|
||||
}
|
||||
|
||||
firstElement := ll.Front()
|
||||
firstItem := firstElement.Value.(*vos.MetricPoint)
|
||||
|
||||
vs := make([]*vos.HPoint, 0)
|
||||
|
||||
if firstItem.Time < smallestTime {
|
||||
return vs
|
||||
}
|
||||
|
||||
v := &vos.HPoint{
|
||||
Timestamp: firstItem.Time,
|
||||
Value: vos.JsonFloat(firstItem.Value),
|
||||
}
|
||||
|
||||
vs = append(vs, v)
|
||||
|
||||
currentElement := firstElement
|
||||
for i := 1; i < size; i++ {
|
||||
nextElement := currentElement.Next()
|
||||
if nextElement == nil {
|
||||
return vs
|
||||
}
|
||||
|
||||
item := nextElement.Value.(*vos.MetricPoint)
|
||||
|
||||
if item.Time < smallestTime {
|
||||
return vs
|
||||
}
|
||||
|
||||
v := &vos.HPoint{
|
||||
Timestamp: item.Time,
|
||||
Value: vos.JsonFloat(item.Value),
|
||||
}
|
||||
vs = append(vs, v)
|
||||
currentElement = nextElement
|
||||
}
|
||||
|
||||
return vs
|
||||
}
|
||||
|
||||
// func (ll *SafeLinkedList) QueryDataByTS(start, end int64) []*vos.HPoint {
|
||||
// size := ll.Len()
|
||||
// if size == 0 {
|
||||
// return []*vos.HPoint{}
|
||||
// }
|
||||
|
||||
// firstElement := ll.Front()
|
||||
// firstItem := firstElement.Value.(*vos.MetricPoint)
|
||||
|
||||
// var vs []*vos.HPoint
|
||||
|
||||
// if firstItem.Time < start {
|
||||
// //最新的点也比起始时间旧,直接返回
|
||||
// return vs
|
||||
// }
|
||||
|
||||
// v := &vos.HPoint{
|
||||
// Timestamp: firstItem.Time,
|
||||
// Value: vos.JsonFloat(firstItem.Value),
|
||||
// }
|
||||
|
||||
// vs = append(vs, v)
|
||||
// currentElement := firstElement
|
||||
|
||||
// for {
|
||||
// nextElement := currentElement.Next()
|
||||
// if nextElement == nil {
|
||||
// return vs
|
||||
// }
|
||||
|
||||
// if nextElement.Value.(*vos.MetricPoint).Time < start {
|
||||
// return vs
|
||||
// }
|
||||
|
||||
// if nextElement.Value.(*vos.MetricPoint).Time > end {
|
||||
// currentElement = nextElement
|
||||
// continue
|
||||
// }
|
||||
|
||||
// v := &vos.HPoint{
|
||||
// Timestamp: nextElement.Value.(*vos.MetricPoint).Time,
|
||||
// Value: vos.JsonFloat(nextElement.Value.(*vos.MetricPoint).Value),
|
||||
// }
|
||||
|
||||
// vs = append(vs, v)
|
||||
// currentElement = nextElement
|
||||
// }
|
||||
|
||||
// return vs
|
||||
// }
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue