forked from JointCloud/pcm-coordinator
Merge remote-tracking branch 'origin/master'
# Conflicts: # go.mod
This commit is contained in:
commit
8bbb72142d
|
@ -1738,6 +1738,11 @@ PayloadCreateTrainJob{
|
|||
Card string `json:"card,optional"`
|
||||
TimeElapsed int32 `json:"elapsed,optional"`
|
||||
}
|
||||
|
||||
TrainingTaskStatResp {
|
||||
Running int32 `json:"running"`
|
||||
Total int32 `json:"total"`
|
||||
}
|
||||
)
|
||||
|
||||
/******************create TrainIngJob end*************************/
|
||||
|
|
|
@ -88,7 +88,8 @@ type (
|
|||
StartDeployInstanceReq {
|
||||
AdapterId string `form:"adapterId"`
|
||||
ClusterId string `form:"clusterId"`
|
||||
InstanceId string `form:"id"`
|
||||
Id string `form:"id"`
|
||||
InstanceId string `form:"instanceId"`
|
||||
}
|
||||
|
||||
StartDeployInstanceResp {
|
||||
|
@ -97,10 +98,26 @@ type (
|
|||
StopDeployInstanceReq {
|
||||
AdapterId string `form:"adapterId"`
|
||||
ClusterId string `form:"clusterId"`
|
||||
InstanceId string `form:"id"`
|
||||
Id string `form:"id"`
|
||||
InstanceId string `form:"instanceId"`
|
||||
}
|
||||
|
||||
StopDeployInstanceResp {
|
||||
}
|
||||
|
||||
DeployInstanceStatReq {
|
||||
|
||||
}
|
||||
|
||||
DeployInstanceStatResp {
|
||||
Running int32 `json:"running"`
|
||||
Total int32 `json:"total"`
|
||||
}
|
||||
|
||||
InferenceTaskStatReq {}
|
||||
|
||||
InferenceTaskStatResp {
|
||||
Running int32 `json:"running"`
|
||||
Total int32 `json:"total"`
|
||||
}
|
||||
)
|
||||
|
|
10
desc/pcm.api
10
desc/pcm.api
|
@ -244,6 +244,10 @@ service pcm {
|
|||
group: ai
|
||||
)
|
||||
service pcm {
|
||||
@doc "训练任务统计"
|
||||
@handler trainingTaskStatHandler
|
||||
get /ai/trainingTaskStat returns (TrainingTaskStatResp)
|
||||
|
||||
@doc "智算中心概览"
|
||||
@handler getCenterOverviewHandler
|
||||
get /ai/getCenterOverview returns (CenterOverviewResp)
|
||||
|
@ -949,6 +953,12 @@ service pcm {
|
|||
|
||||
@handler StopDeployInstanceHandler
|
||||
post /inference/stopDeployInstance (StopDeployInstanceReq) returns (StopDeployInstanceResp)
|
||||
|
||||
@handler DeployInstanceStatHandler
|
||||
get /inference/deployInstanceStat (DeployInstanceStatReq) returns (DeployInstanceStatResp)
|
||||
|
||||
@handler InferenceTaskStatHandler
|
||||
get /inference/taskStat (InferenceTaskStatReq) returns (InferenceTaskStatResp)
|
||||
}
|
||||
|
||||
@server(
|
||||
|
|
4
go.mod
4
go.mod
|
@ -18,8 +18,8 @@ require (
|
|||
github.com/prometheus/common v0.55.0
|
||||
github.com/robfig/cron/v3 v3.0.1
|
||||
github.com/zeromicro/go-zero v1.6.6
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240722032933-7bd6ab00d249
|
||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240724095608-1727d09f030c
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240725071305-f751eec4dde1
|
||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240722092017-50d17f36d023
|
||||
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240718073732-bc5d687f6330
|
||||
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203
|
||||
gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5
|
||||
|
|
4
go.sum
4
go.sum
|
@ -495,8 +495,8 @@ github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M
|
|||
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
|
||||
github.com/zeromicro/go-zero v1.6.6 h1:nZTVYObklHiBdYJ/nPoAZ8kGVAplWSDjT7DGE7ur0uk=
|
||||
github.com/zeromicro/go-zero v1.6.6/go.mod h1:olKf1/hELbSmuIgLgJeoeNVp3tCbLqj6UmO7ATSta4A=
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240722032933-7bd6ab00d249 h1:bHJGq5P+8w4fP62PZhIiq/fvOhvDPRtkM4pcmU8OZ1w=
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240722032933-7bd6ab00d249/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY=
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240725071305-f751eec4dde1 h1:DicBXoQiC6mumMBeyqSPNrsjtqJIgk5Pv2hscu2xryw=
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240725071305-f751eec4dde1/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY=
|
||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240722092017-50d17f36d023 h1:9DNobl0gvRCSXtjyMsfUwq0w0TMvds4rqNRsEqeX4j8=
|
||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240722092017-50d17f36d023/go.mod h1:/eOmBFZKWGoabG3sRVkVvIbLwsd2631k4jkUBR6x1AA=
|
||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240724095608-1727d09f030c h1:CodJeGgTYJwA6NDHFnw6B+4pBXUl79tvAcECq39tgZI=
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
package ai
|
||||
|
||||
import (
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||
"net/http"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/ai"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
)
|
||||
|
||||
func TrainingTaskStatHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
l := ai.NewTrainingTaskStatLogic(r.Context(), svcCtx)
|
||||
resp, err := l.TrainingTaskStat()
|
||||
result.HttpResult(r, w, resp, err)
|
||||
}
|
||||
}
|
|
@ -1,28 +1,24 @@
|
|||
package inference
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"jcc-coordinator/internal/logic/inference"
|
||||
"jcc-coordinator/internal/svc"
|
||||
"jcc-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/inference"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func DeployInstanceListHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
var req types.DeployInstanceListReq
|
||||
if err := httpx.Parse(r, &req); err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
result.ParamErrorResult(r, w, err)
|
||||
return
|
||||
}
|
||||
|
||||
l := inference.NewDeployInstanceListLogic(r.Context(), svcCtx)
|
||||
resp, err := l.DeployInstanceList(&req)
|
||||
if err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
} else {
|
||||
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||
}
|
||||
result.HttpResult(r, w, resp, err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
package inference
|
||||
|
||||
import (
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/inference"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
)
|
||||
|
||||
func DeployInstanceStatHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
var req types.DeployInstanceStatReq
|
||||
if err := httpx.Parse(r, &req); err != nil {
|
||||
result.ParamErrorResult(r, w, err)
|
||||
return
|
||||
}
|
||||
|
||||
l := inference.NewDeployInstanceStatLogic(r.Context(), svcCtx)
|
||||
resp, err := l.DeployInstanceStat(&req)
|
||||
result.HttpResult(r, w, resp, err)
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
package inference
|
||||
|
||||
import (
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/inference"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func InferenceTaskStatHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
var req types.InferenceTaskStatReq
|
||||
if err := httpx.Parse(r, &req); err != nil {
|
||||
result.ParamErrorResult(r, w, err)
|
||||
return
|
||||
}
|
||||
|
||||
l := inference.NewInferenceTaskStatLogic(r.Context(), svcCtx)
|
||||
resp, err := l.InferenceTaskStat(&req)
|
||||
result.HttpResult(r, w, resp, err)
|
||||
|
||||
}
|
||||
}
|
|
@ -1,28 +1,24 @@
|
|||
package inference
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"jcc-coordinator/internal/logic/inference"
|
||||
"jcc-coordinator/internal/svc"
|
||||
"jcc-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/inference"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func StartDeployInstanceListHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
var req types.StartDeployInstanceReq
|
||||
if err := httpx.Parse(r, &req); err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
result.ParamErrorResult(r, w, err)
|
||||
return
|
||||
}
|
||||
|
||||
l := inference.NewStartDeployInstanceListLogic(r.Context(), svcCtx)
|
||||
resp, err := l.StartDeployInstanceList(&req)
|
||||
if err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
} else {
|
||||
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||
}
|
||||
result.HttpResult(r, w, resp, err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,28 +1,24 @@
|
|||
package inference
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"jcc-coordinator/internal/logic/inference"
|
||||
"jcc-coordinator/internal/svc"
|
||||
"jcc-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/inference"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func StopDeployInstanceHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
var req types.StopDeployInstanceReq
|
||||
if err := httpx.Parse(r, &req); err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
result.ParamErrorResult(r, w, err)
|
||||
return
|
||||
}
|
||||
|
||||
l := inference.NewStopDeployInstanceLogic(r.Context(), svcCtx)
|
||||
resp, err := l.StopDeployInstance(&req)
|
||||
if err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
} else {
|
||||
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||
}
|
||||
result.HttpResult(r, w, resp, err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -287,6 +287,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
|||
|
||||
server.AddRoutes(
|
||||
[]rest.Route{
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/ai/trainingTaskStat",
|
||||
Handler: ai.TrainingTaskStatHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/ai/getCenterOverview",
|
||||
|
@ -1198,6 +1203,16 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
|||
Path: "/inference/stopDeployInstance",
|
||||
Handler: inference.StopDeployInstanceHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/inference/deployInstanceStat",
|
||||
Handler: inference.DeployInstanceStatHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/inference/taskStat",
|
||||
Handler: inference.InferenceTaskStatHandler(serverCtx),
|
||||
},
|
||||
},
|
||||
rest.WithPrefix("/pcm/v1"),
|
||||
)
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
package ai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
||||
type TrainingTaskStatLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewTrainingTaskStatLogic(ctx context.Context, svcCtx *svc.ServiceContext) *TrainingTaskStatLogic {
|
||||
return &TrainingTaskStatLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *TrainingTaskStatLogic) TrainingTaskStat() (resp *types.TrainingTaskStatResp, err error) {
|
||||
resp = &types.TrainingTaskStatResp{}
|
||||
|
||||
total, err := l.svcCtx.Scheduler.AiStorages.GetTrainingTaskTotalNum()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
running, err := l.svcCtx.Scheduler.AiStorages.GetTrainingTaskRunningNum()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if total == 0 {
|
||||
return nil, errors.New("get statistics failed")
|
||||
}
|
||||
|
||||
resp.Total = total
|
||||
resp.Running = running
|
||||
return resp, nil
|
||||
}
|
|
@ -2,11 +2,11 @@ package inference
|
|||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"jcc-coordinator/internal/svc"
|
||||
"jcc-coordinator/internal/types"
|
||||
|
||||
"errors"
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
)
|
||||
|
||||
type DeployInstanceListLogic struct {
|
||||
|
@ -24,7 +24,35 @@ func NewDeployInstanceListLogic(ctx context.Context, svcCtx *svc.ServiceContext)
|
|||
}
|
||||
|
||||
func (l *DeployInstanceListLogic) DeployInstanceList(req *types.DeployInstanceListReq) (resp *types.DeployInstanceListResp, err error) {
|
||||
// todo: add your logic here and delete this line
|
||||
limit := req.PageSize
|
||||
offset := req.PageSize * (req.PageNum - 1)
|
||||
resp = &types.DeployInstanceListResp{}
|
||||
|
||||
var list []*models.AiInferDeployInstance
|
||||
|
||||
tx := l.svcCtx.DbEngin.Raw("select * from ai_infer_deploy_instance").Scan(&list)
|
||||
if tx.Error != nil {
|
||||
logx.Errorf(tx.Error.Error())
|
||||
return nil, tx.Error
|
||||
}
|
||||
|
||||
//count total
|
||||
var total int64
|
||||
err = tx.Count(&total).Error
|
||||
tx.Limit(limit).Offset(offset)
|
||||
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
|
||||
err = tx.Order("create_time desc").Find(&list).Error
|
||||
if err != nil {
|
||||
return nil, errors.New(err.Error())
|
||||
}
|
||||
resp.List = &list
|
||||
resp.PageSize = req.PageSize
|
||||
resp.PageNum = req.PageNum
|
||||
resp.Total = total
|
||||
|
||||
return
|
||||
}
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
package inference
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
||||
type DeployInstanceStatLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewDeployInstanceStatLogic(ctx context.Context, svcCtx *svc.ServiceContext) *DeployInstanceStatLogic {
|
||||
return &DeployInstanceStatLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *DeployInstanceStatLogic) DeployInstanceStat(req *types.DeployInstanceStatReq) (resp *types.DeployInstanceStatResp, err error) {
|
||||
resp = &types.DeployInstanceStatResp{}
|
||||
|
||||
total, err := l.svcCtx.Scheduler.AiStorages.GetInferDeployInstanceTotalNum()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
running, err := l.svcCtx.Scheduler.AiStorages.GetInferDeployInstanceRunningNum()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if total == 0 {
|
||||
return nil, errors.New("get statistics failed")
|
||||
}
|
||||
|
||||
resp.Total = total
|
||||
resp.Running = running
|
||||
return resp, nil
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
package inference
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
||||
type InferenceTaskStatLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewInferenceTaskStatLogic(ctx context.Context, svcCtx *svc.ServiceContext) *InferenceTaskStatLogic {
|
||||
return &InferenceTaskStatLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *InferenceTaskStatLogic) InferenceTaskStat(req *types.InferenceTaskStatReq) (resp *types.InferenceTaskStatResp, err error) {
|
||||
resp = &types.InferenceTaskStatResp{}
|
||||
|
||||
total, err := l.svcCtx.Scheduler.AiStorages.GetInferenceTaskTotalNum()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
running, err := l.svcCtx.Scheduler.AiStorages.GetInferenceTaskRunningNum()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if total == 0 {
|
||||
return nil, errors.New("get statistics failed")
|
||||
}
|
||||
|
||||
resp.Total = total
|
||||
resp.Running = running
|
||||
return resp, nil
|
||||
}
|
|
@ -2,11 +2,11 @@ package inference
|
|||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"jcc-coordinator/internal/svc"
|
||||
"jcc-coordinator/internal/types"
|
||||
|
||||
"errors"
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
type StartDeployInstanceListLogic struct {
|
||||
|
@ -24,7 +24,19 @@ func NewStartDeployInstanceListLogic(ctx context.Context, svcCtx *svc.ServiceCon
|
|||
}
|
||||
|
||||
func (l *StartDeployInstanceListLogic) StartDeployInstanceList(req *types.StartDeployInstanceReq) (resp *types.StartDeployInstanceResp, err error) {
|
||||
// todo: add your logic here and delete this line
|
||||
resp = &types.StartDeployInstanceResp{}
|
||||
success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[req.AdapterId][req.ClusterId].StartInferDeployInstance(l.ctx, req.InstanceId)
|
||||
id, err := strconv.ParseInt(req.Id, 10, 64)
|
||||
ins, err := l.svcCtx.Scheduler.AiStorages.GetInferDeployInstanceById(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return
|
||||
l.svcCtx.Scheduler.AiStorages.UpdateInferDeployInstance(ins)
|
||||
|
||||
if !success {
|
||||
return nil, errors.New("start instance failed")
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
|
|
@ -2,11 +2,11 @@ package inference
|
|||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"jcc-coordinator/internal/svc"
|
||||
"jcc-coordinator/internal/types"
|
||||
|
||||
"errors"
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
type StopDeployInstanceLogic struct {
|
||||
|
@ -24,7 +24,18 @@ func NewStopDeployInstanceLogic(ctx context.Context, svcCtx *svc.ServiceContext)
|
|||
}
|
||||
|
||||
func (l *StopDeployInstanceLogic) StopDeployInstance(req *types.StopDeployInstanceReq) (resp *types.StopDeployInstanceResp, err error) {
|
||||
// todo: add your logic here and delete this line
|
||||
resp = &types.StopDeployInstanceResp{}
|
||||
success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[req.AdapterId][req.ClusterId].StopInferDeployInstance(l.ctx, req.InstanceId)
|
||||
id, err := strconv.ParseInt(req.Id, 10, 64)
|
||||
ins, err := l.svcCtx.Scheduler.AiStorages.GetInferDeployInstanceById(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return
|
||||
l.svcCtx.Scheduler.AiStorages.UpdateInferDeployInstance(ins)
|
||||
|
||||
if !success {
|
||||
return nil, errors.New("stop instance failed")
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
|
|
@ -373,19 +373,20 @@ func (s *AiStorage) AddNoticeInfo(adapterId string, adapterName string, clusterI
|
|||
}
|
||||
}
|
||||
|
||||
func (s *AiStorage) SaveInferDeployInstance() (int64, error) {
|
||||
func (s *AiStorage) SaveInferDeployInstance(instanceId string, instanceName string, adapterId int64,
|
||||
adapterName string, clusterId int64, clusterName string, modelName string, modelType string, inferCard string) (int64, error) {
|
||||
startTime := time.Now().Format(time.RFC3339)
|
||||
// 构建主任务结构体
|
||||
insModel := models.AiInferDeployInstance{
|
||||
InstanceId: "",
|
||||
InstanceName: "",
|
||||
AdapterId: 123,
|
||||
AdapterName: "",
|
||||
ClusterId: 123,
|
||||
ClusterName: "",
|
||||
ModelName: "",
|
||||
ModelType: "",
|
||||
InferCard: "",
|
||||
InstanceId: instanceId,
|
||||
InstanceName: instanceName,
|
||||
AdapterId: adapterId,
|
||||
AdapterName: adapterName,
|
||||
ClusterId: clusterId,
|
||||
ClusterName: clusterName,
|
||||
ModelName: modelName,
|
||||
ModelType: modelType,
|
||||
InferCard: inferCard,
|
||||
Status: constants.Saved,
|
||||
CreateTime: startTime,
|
||||
UpdateTime: startTime,
|
||||
|
@ -417,3 +418,73 @@ func (s *AiStorage) GetInferDeployInstanceById(id int64) (*models.AiInferDeployI
|
|||
}
|
||||
return &deployIns, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) GetInferDeployInstanceList() ([]*models.AiInferDeployInstance, error) {
|
||||
var list []*models.AiInferDeployInstance
|
||||
tx := s.DbEngin.Raw("select * from ai_infer_deploy_instance").Scan(&list)
|
||||
if tx.Error != nil {
|
||||
logx.Errorf(tx.Error.Error())
|
||||
return nil, tx.Error
|
||||
}
|
||||
return list, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) GetInferDeployInstanceTotalNum() (int32, error) {
|
||||
var total int32
|
||||
tx := s.DbEngin.Raw("select count(*) from ai_infer_deploy_instance").Scan(&total)
|
||||
if tx.Error != nil {
|
||||
logx.Errorf(tx.Error.Error())
|
||||
return 0, tx.Error
|
||||
}
|
||||
return total, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) GetInferDeployInstanceRunningNum() (int32, error) {
|
||||
var total int32
|
||||
tx := s.DbEngin.Raw("select count(*) from ai_infer_deploy_instance where `status` = 'running'").Scan(&total)
|
||||
if tx.Error != nil {
|
||||
logx.Errorf(tx.Error.Error())
|
||||
return 0, tx.Error
|
||||
}
|
||||
return total, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) GetInferenceTaskTotalNum() (int32, error) {
|
||||
var total int32
|
||||
tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 11 or `task_type_dict` = 12").Scan(&total)
|
||||
if tx.Error != nil {
|
||||
logx.Errorf(tx.Error.Error())
|
||||
return 0, tx.Error
|
||||
}
|
||||
return total, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) GetInferenceTaskRunningNum() (int32, error) {
|
||||
var total int32
|
||||
tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 11 and `status` = 'Running'").Scan(&total)
|
||||
if tx.Error != nil {
|
||||
logx.Errorf(tx.Error.Error())
|
||||
return 0, tx.Error
|
||||
}
|
||||
return total, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) GetTrainingTaskTotalNum() (int32, error) {
|
||||
var total int32
|
||||
tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 10").Scan(&total)
|
||||
if tx.Error != nil {
|
||||
logx.Errorf(tx.Error.Error())
|
||||
return 0, tx.Error
|
||||
}
|
||||
return total, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) GetTrainingTaskRunningNum() (int32, error) {
|
||||
var total int32
|
||||
tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 11 and `status` = 'Running'").Scan(&total)
|
||||
if tx.Error != nil {
|
||||
logx.Errorf(tx.Error.Error())
|
||||
return 0, tx.Error
|
||||
}
|
||||
return total, nil
|
||||
}
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
package updater
|
||||
|
||||
import (
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
"net/http"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
func UpdateDeployInstanceStatus(svc *svc.ServiceContext, instance *models.AiInferDeployInstance) {
|
||||
amap, found := svc.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(instance.AdapterId, 10)]
|
||||
if !found {
|
||||
return
|
||||
}
|
||||
cmap, found := amap[strconv.FormatInt(instance.ClusterId, 10)]
|
||||
if !found {
|
||||
return
|
||||
}
|
||||
h := http.Request{}
|
||||
ins, err := cmap.GetInferDeployInstance(h.Context(), instance.InstanceId)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
switch instance.ClusterType {
|
||||
case storeLink.TYPE_OCTOPUS:
|
||||
switch ins.Status {
|
||||
case "running":
|
||||
instance.Status = constants.Running
|
||||
case "stopped":
|
||||
instance.Status = constants.Stopped
|
||||
default:
|
||||
instance.Status = ins.Status
|
||||
}
|
||||
|
||||
case storeLink.TYPE_SHUGUANGAI:
|
||||
switch ins.Status {
|
||||
case "running":
|
||||
instance.Status = constants.Running
|
||||
case "Terminated":
|
||||
instance.Status = constants.Stopped
|
||||
default:
|
||||
instance.Status = ins.Status
|
||||
}
|
||||
}
|
||||
err = svc.Scheduler.AiStorages.UpdateInferDeployInstance(instance)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
|
@ -877,6 +877,7 @@ func (o *OctopusLink) GetInferUrl(ctx context.Context, option *option.InferOptio
|
|||
Platform: o.platform,
|
||||
PageIndex: o.pageIndex,
|
||||
PageSize: o.pageSize,
|
||||
SearchKey: DEPLOY_INSTANCE_PREFIEX,
|
||||
}
|
||||
list, err := o.octopusRpc.GetNotebookList(ctx, req)
|
||||
if err != nil {
|
||||
|
@ -885,9 +886,9 @@ func (o *OctopusLink) GetInferUrl(ctx context.Context, option *option.InferOptio
|
|||
|
||||
var imageUrls []*inference.InferUrl
|
||||
for _, notebook := range list.Payload.GetNotebooks() {
|
||||
if strings.Contains(notebook.AlgorithmName, option.ModelName) && notebook.Status == "running" {
|
||||
if strings.Contains(notebook.Desc, option.ModelName) && notebook.Status == "running" {
|
||||
url := strings.Replace(notebook.Tasks[0].Url, FORWARD_SLASH, "", -1)
|
||||
names := strings.Split(notebook.AlgorithmName, UNDERSCORE)
|
||||
names := strings.Split(notebook.Desc, FORWARD_SLASH)
|
||||
imageUrl := &inference.InferUrl{
|
||||
Url: DOMAIN + url,
|
||||
Card: names[2],
|
||||
|
@ -955,5 +956,23 @@ func (o *OctopusLink) StopInferDeployInstance(ctx context.Context, id string) bo
|
|||
}
|
||||
|
||||
func (o *OctopusLink) GetInferDeployInstance(ctx context.Context, id string) (*inference.DeployInstance, error) {
|
||||
return nil, nil
|
||||
ins := &inference.DeployInstance{}
|
||||
|
||||
req := &octopus.GetNotebookReq{
|
||||
Platform: o.platform,
|
||||
Id: id,
|
||||
}
|
||||
|
||||
resp, err := o.octopusRpc.GetNotebook(ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if resp.Payload == nil {
|
||||
return nil, errors.New("instance does not exist")
|
||||
}
|
||||
ins.InstanceName = resp.Payload.Notebook.Name
|
||||
ins.InstanceId = resp.Payload.Notebook.Id
|
||||
ins.ClusterName = o.platform
|
||||
ins.Status = resp.Payload.Notebook.Status
|
||||
return ins, nil
|
||||
}
|
||||
|
|
|
@ -851,5 +851,19 @@ func (s *ShuguangAi) StopInferDeployInstance(ctx context.Context, id string) boo
|
|||
}
|
||||
|
||||
func (s *ShuguangAi) GetInferDeployInstance(ctx context.Context, id string) (*inference.DeployInstance, error) {
|
||||
return nil, nil
|
||||
ins := &inference.DeployInstance{}
|
||||
req := &hpcAC.GetInstanceServiceDetailReq{
|
||||
Id: id,
|
||||
}
|
||||
resp, err := s.aCRpc.GetInstanceServiceDetail(ctx, req)
|
||||
if err != nil || resp.Code != "0" {
|
||||
return nil, err
|
||||
}
|
||||
ins.InstanceName = resp.Data.InstanceServiceName
|
||||
ins.InstanceId = resp.Data.Id
|
||||
ins.ClusterName = s.platform
|
||||
ins.Status = resp.Data.Status
|
||||
ins.InferCard = DCU
|
||||
ins.CreatedTime = resp.Data.CreateTime
|
||||
return ins, nil
|
||||
}
|
||||
|
|
|
@ -79,8 +79,8 @@ var (
|
|||
4: "制作失败",
|
||||
}
|
||||
ModelTypeMap = map[string][]string{
|
||||
"image_recognition": {"imagenet_resnet50"},
|
||||
"text_to_text": {"chatGLM_6B"},
|
||||
"image_classification": {"imagenet_resnet50"},
|
||||
"text_to_text": {"chatGLM_6B"},
|
||||
}
|
||||
AITYPE = map[string]string{
|
||||
"1": OCTOPUS,
|
||||
|
|
|
@ -2902,6 +2902,11 @@ type AiTask struct {
|
|||
TimeElapsed int32 `json:"elapsed,optional"`
|
||||
}
|
||||
|
||||
type TrainingTaskStatResp struct {
|
||||
Running int32 `json:"running"`
|
||||
Total int32 `json:"total"`
|
||||
}
|
||||
|
||||
type ChatReq struct {
|
||||
ApiUrl string `json:"apiUrl"`
|
||||
Method string `json:"method,optional"`
|
||||
|
@ -5975,7 +5980,8 @@ type DeployInstanceListResp struct {
|
|||
type StartDeployInstanceReq struct {
|
||||
AdapterId string `form:"adapterId"`
|
||||
ClusterId string `form:"clusterId"`
|
||||
InstanceId string `form:"id"`
|
||||
Id string `form:"id"`
|
||||
InstanceId string `form:"instanceId"`
|
||||
}
|
||||
|
||||
type StartDeployInstanceResp struct {
|
||||
|
@ -5984,8 +5990,25 @@ type StartDeployInstanceResp struct {
|
|||
type StopDeployInstanceReq struct {
|
||||
AdapterId string `form:"adapterId"`
|
||||
ClusterId string `form:"clusterId"`
|
||||
InstanceId string `form:"id"`
|
||||
Id string `form:"id"`
|
||||
InstanceId string `form:"instanceId"`
|
||||
}
|
||||
|
||||
type StopDeployInstanceResp struct {
|
||||
}
|
||||
|
||||
type DeployInstanceStatReq struct {
|
||||
}
|
||||
|
||||
type DeployInstanceStatResp struct {
|
||||
Running int32 `json:"running"`
|
||||
Total int32 `json:"total"`
|
||||
}
|
||||
|
||||
type InferenceTaskStatReq struct {
|
||||
}
|
||||
|
||||
type InferenceTaskStatResp struct {
|
||||
Running int32 `json:"running"`
|
||||
Total int32 `json:"total"`
|
||||
}
|
||||
|
|
|
@ -48,6 +48,7 @@ type (
|
|||
Status string `db:"status"`
|
||||
CreateTime string `db:"create_time"`
|
||||
UpdateTime string `db:"update_time"`
|
||||
ClusterType string `db:"cluster_type"`
|
||||
}
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in New Issue