Merge remote-tracking branch 'origin/master'

# Conflicts: # go.mod
2024-07-25 17:19:40 +08:00 · 2024-07-25 17:19:40 +08:00 · 8bbb72142d
parent 92d809e27f 2d73079c8a
commit 8bbb72142d
25 changed files with 548 additions and 74 deletions
--- a/desc/ai/pcm-ai.api
+++ b/desc/ai/pcm-ai.api
@ -1738,6 +1738,11 @@ PayloadCreateTrainJob{
 		Card string `json:"card,optional"`
 		TimeElapsed int32 `json:"elapsed,optional"`
 	}
+
+	TrainingTaskStatResp {
+		Running int32 `json:"running"`
+		Total int32 `json:"total"`
+	}
 )

 /******************create TrainIngJob end*************************/
--- a/desc/inference/inference.api
+++ b/desc/inference/inference.api
@ -88,7 +88,8 @@ type (
     StartDeployInstanceReq {
          AdapterId          string `form:"adapterId"`
          ClusterId      string `form:"clusterId"`
-          InstanceId string `form:"id"`
+          Id      string `form:"id"`
+          InstanceId string `form:"instanceId"`
     }

     StartDeployInstanceResp {
@ -97,10 +98,26 @@ type (
     StopDeployInstanceReq {
          AdapterId          string `form:"adapterId"`
          ClusterId      string `form:"clusterId"`
-          InstanceId string `form:"id"`
+          Id      string `form:"id"`
+          InstanceId string `form:"instanceId"`
     }

     StopDeployInstanceResp {
     }

+     DeployInstanceStatReq {
+
+     }
+
+     DeployInstanceStatResp {
+          Running int32 `json:"running"`
+          Total int32 `json:"total"`
+     }
+
+     InferenceTaskStatReq {}
+
+     InferenceTaskStatResp {
+          Running int32 `json:"running"`
+          Total int32 `json:"total"`
+     }
 )
--- a/desc/pcm.api
+++ b/desc/pcm.api
@ -244,6 +244,10 @@ service pcm {
 	group: ai
 )
 service pcm {
+	@doc "训练任务统计"
+	@handler trainingTaskStatHandler
+	get /ai/trainingTaskStat returns (TrainingTaskStatResp)
+
 	@doc "智算中心概览"
 	@handler getCenterOverviewHandler
 	get /ai/getCenterOverview returns (CenterOverviewResp)
@ -949,6 +953,12 @@ service pcm {

 	@handler StopDeployInstanceHandler
 	post /inference/stopDeployInstance (StopDeployInstanceReq) returns (StopDeployInstanceResp)
+
+	@handler DeployInstanceStatHandler
+	get /inference/deployInstanceStat (DeployInstanceStatReq) returns (DeployInstanceStatResp)
+
+	@handler InferenceTaskStatHandler
+	get /inference/taskStat (InferenceTaskStatReq) returns (InferenceTaskStatResp)
 }

@server(
--- a/go.mod
+++ b/go.mod
@ -18,8 +18,8 @@ require (
 	github.com/prometheus/common v0.55.0
 	github.com/robfig/cron/v3 v3.0.1
 	github.com/zeromicro/go-zero v1.6.6
-	gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240722032933-7bd6ab00d249
-	gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240724095608-1727d09f030c
+	gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240725071305-f751eec4dde1
+	gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240722092017-50d17f36d023
 	gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240718073732-bc5d687f6330
 	gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203
 	gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5
--- a/go.sum
+++ b/go.sum
@ -495,8 +495,8 @@ github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M
 github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
 github.com/zeromicro/go-zero v1.6.6 h1:nZTVYObklHiBdYJ/nPoAZ8kGVAplWSDjT7DGE7ur0uk=
 github.com/zeromicro/go-zero v1.6.6/go.mod h1:olKf1/hELbSmuIgLgJeoeNVp3tCbLqj6UmO7ATSta4A=
-gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240722032933-7bd6ab00d249 h1:bHJGq5P+8w4fP62PZhIiq/fvOhvDPRtkM4pcmU8OZ1w=
-gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240722032933-7bd6ab00d249/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY=
+gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240725071305-f751eec4dde1 h1:DicBXoQiC6mumMBeyqSPNrsjtqJIgk5Pv2hscu2xryw=
+gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240725071305-f751eec4dde1/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY=
 gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240722092017-50d17f36d023 h1:9DNobl0gvRCSXtjyMsfUwq0w0TMvds4rqNRsEqeX4j8=
 gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240722092017-50d17f36d023/go.mod h1:/eOmBFZKWGoabG3sRVkVvIbLwsd2631k4jkUBR6x1AA=
 gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240724095608-1727d09f030c h1:CodJeGgTYJwA6NDHFnw6B+4pBXUl79tvAcECq39tgZI=
--- a/internal/handler/ai/trainingtaskstathandler.go
+++ b/internal/handler/ai/trainingtaskstathandler.go
@ -0,0 +1,17 @@
+package ai
+
+import (
+	"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
+	"net/http"
+
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/ai"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
+)
+
+func TrainingTaskStatHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		l := ai.NewTrainingTaskStatLogic(r.Context(), svcCtx)
+		resp, err := l.TrainingTaskStat()
+		result.HttpResult(r, w, resp, err)
+	}
+}
--- a/internal/handler/inference/deployinstancelisthandler.go
+++ b/internal/handler/inference/deployinstancelisthandler.go
@ -1,28 +1,24 @@
 package inference

 import (
-	"net/http"
-
 	"github.com/zeromicro/go-zero/rest/httpx"
-	"jcc-coordinator/internal/logic/inference"
-	"jcc-coordinator/internal/svc"
-	"jcc-coordinator/internal/types"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/inference"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
+	"net/http"
 )

 func DeployInstanceListHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		var req types.DeployInstanceListReq
 		if err := httpx.Parse(r, &req); err != nil {
-			httpx.ErrorCtx(r.Context(), w, err)
+			result.ParamErrorResult(r, w, err)
 			return
 		}

 		l := inference.NewDeployInstanceListLogic(r.Context(), svcCtx)
 		resp, err := l.DeployInstanceList(&req)
-		if err != nil {
-			httpx.ErrorCtx(r.Context(), w, err)
-		} else {
-			httpx.OkJsonCtx(r.Context(), w, resp)
-		}
+		result.HttpResult(r, w, resp, err)
 	}
 }
--- a/internal/handler/inference/deployinstancestathandler.go
+++ b/internal/handler/inference/deployinstancestathandler.go
@ -0,0 +1,26 @@
+package inference
+
+import (
+	"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
+	"net/http"
+
+	"github.com/zeromicro/go-zero/rest/httpx"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/inference"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
+)
+
+func DeployInstanceStatHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		var req types.DeployInstanceStatReq
+		if err := httpx.Parse(r, &req); err != nil {
+			result.ParamErrorResult(r, w, err)
+			return
+		}
+
+		l := inference.NewDeployInstanceStatLogic(r.Context(), svcCtx)
+		resp, err := l.DeployInstanceStat(&req)
+		result.HttpResult(r, w, resp, err)
+
+	}
+}
--- a/internal/handler/inference/inferencetaskstathandler.go
+++ b/internal/handler/inference/inferencetaskstathandler.go
@ -0,0 +1,25 @@
+package inference
+
+import (
+	"github.com/zeromicro/go-zero/rest/httpx"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/inference"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
+	"net/http"
+)
+
+func InferenceTaskStatHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		var req types.InferenceTaskStatReq
+		if err := httpx.Parse(r, &req); err != nil {
+			result.ParamErrorResult(r, w, err)
+			return
+		}
+
+		l := inference.NewInferenceTaskStatLogic(r.Context(), svcCtx)
+		resp, err := l.InferenceTaskStat(&req)
+		result.HttpResult(r, w, resp, err)
+
+	}
+}
--- a/internal/handler/inference/startdeployinstancelisthandler.go
+++ b/internal/handler/inference/startdeployinstancelisthandler.go
@ -1,28 +1,24 @@
 package inference

 import (
-	"net/http"
-
 	"github.com/zeromicro/go-zero/rest/httpx"
-	"jcc-coordinator/internal/logic/inference"
-	"jcc-coordinator/internal/svc"
-	"jcc-coordinator/internal/types"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/inference"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
+	"net/http"
 )

 func StartDeployInstanceListHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		var req types.StartDeployInstanceReq
 		if err := httpx.Parse(r, &req); err != nil {
-			httpx.ErrorCtx(r.Context(), w, err)
+			result.ParamErrorResult(r, w, err)
 			return
 		}

 		l := inference.NewStartDeployInstanceListLogic(r.Context(), svcCtx)
 		resp, err := l.StartDeployInstanceList(&req)
-		if err != nil {
-			httpx.ErrorCtx(r.Context(), w, err)
-		} else {
-			httpx.OkJsonCtx(r.Context(), w, resp)
-		}
+		result.HttpResult(r, w, resp, err)
 	}
 }
--- a/internal/handler/inference/stopdeployinstancehandler.go
+++ b/internal/handler/inference/stopdeployinstancehandler.go
@ -1,28 +1,24 @@
 package inference

 import (
-	"net/http"
-
 	"github.com/zeromicro/go-zero/rest/httpx"
-	"jcc-coordinator/internal/logic/inference"
-	"jcc-coordinator/internal/svc"
-	"jcc-coordinator/internal/types"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/inference"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
+	"net/http"
 )

 func StopDeployInstanceHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		var req types.StopDeployInstanceReq
 		if err := httpx.Parse(r, &req); err != nil {
-			httpx.ErrorCtx(r.Context(), w, err)
+			result.ParamErrorResult(r, w, err)
 			return
 		}

 		l := inference.NewStopDeployInstanceLogic(r.Context(), svcCtx)
 		resp, err := l.StopDeployInstance(&req)
-		if err != nil {
-			httpx.ErrorCtx(r.Context(), w, err)
-		} else {
-			httpx.OkJsonCtx(r.Context(), w, resp)
-		}
+		result.HttpResult(r, w, resp, err)
 	}
 }
--- a/internal/handler/routes.go
+++ b/internal/handler/routes.go
@ -287,6 +287,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {

 	server.AddRoutes(
 		[]rest.Route{
+			{
+				Method:  http.MethodGet,
+				Path:    "/ai/trainingTaskStat",
+				Handler: ai.TrainingTaskStatHandler(serverCtx),
+			},
 			{
 				Method:  http.MethodGet,
 				Path:    "/ai/getCenterOverview",
@ -1198,6 +1203,16 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
 				Path:    "/inference/stopDeployInstance",
 				Handler: inference.StopDeployInstanceHandler(serverCtx),
 			},
+			{
+				Method:  http.MethodGet,
+				Path:    "/inference/deployInstanceStat",
+				Handler: inference.DeployInstanceStatHandler(serverCtx),
+			},
+			{
+				Method:  http.MethodGet,
+				Path:    "/inference/taskStat",
+				Handler: inference.InferenceTaskStatHandler(serverCtx),
+			},
 		},
 		rest.WithPrefix("/pcm/v1"),
 	)
--- a/internal/logic/ai/trainingtaskstatlogic.go
+++ b/internal/logic/ai/trainingtaskstatlogic.go
@ -0,0 +1,47 @@
+package ai
+
+import (
+	"context"
+	"errors"
+
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
+
+	"github.com/zeromicro/go-zero/core/logx"
+)
+
+type TrainingTaskStatLogic struct {
+	logx.Logger
+	ctx    context.Context
+	svcCtx *svc.ServiceContext
+}
+
+func NewTrainingTaskStatLogic(ctx context.Context, svcCtx *svc.ServiceContext) *TrainingTaskStatLogic {
+	return &TrainingTaskStatLogic{
+		Logger: logx.WithContext(ctx),
+		ctx:    ctx,
+		svcCtx: svcCtx,
+	}
+}
+
+func (l *TrainingTaskStatLogic) TrainingTaskStat() (resp *types.TrainingTaskStatResp, err error) {
+	resp = &types.TrainingTaskStatResp{}
+
+	total, err := l.svcCtx.Scheduler.AiStorages.GetTrainingTaskTotalNum()
+	if err != nil {
+		return nil, err
+	}
+
+	running, err := l.svcCtx.Scheduler.AiStorages.GetTrainingTaskRunningNum()
+	if err != nil {
+		return nil, err
+	}
+
+	if total == 0 {
+		return nil, errors.New("get statistics failed")
+	}
+
+	resp.Total = total
+	resp.Running = running
+	return resp, nil
+}
--- a/internal/logic/inference/deployinstancelistlogic.go
+++ b/internal/logic/inference/deployinstancelistlogic.go
@ -2,11 +2,11 @@ package inference

 import (
 	"context"
-
-	"jcc-coordinator/internal/svc"
-	"jcc-coordinator/internal/types"
-
+	"errors"
 	"github.com/zeromicro/go-zero/core/logx"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
 )

 type DeployInstanceListLogic struct {
@ -24,7 +24,35 @@ func NewDeployInstanceListLogic(ctx context.Context, svcCtx *svc.ServiceContext)
 }

 func (l *DeployInstanceListLogic) DeployInstanceList(req *types.DeployInstanceListReq) (resp *types.DeployInstanceListResp, err error) {
-	// todo: add your logic here and delete this line
+	limit := req.PageSize
+	offset := req.PageSize * (req.PageNum - 1)
+	resp = &types.DeployInstanceListResp{}
+
+	var list []*models.AiInferDeployInstance
+
+	tx := l.svcCtx.DbEngin.Raw("select * from ai_infer_deploy_instance").Scan(&list)
+	if tx.Error != nil {
+		logx.Errorf(tx.Error.Error())
+		return nil, tx.Error
+	}
+
+	//count total
+	var total int64
+	err = tx.Count(&total).Error
+	tx.Limit(limit).Offset(offset)
+
+	if err != nil {
+		return resp, err
+	}
+
+	err = tx.Order("create_time desc").Find(&list).Error
+	if err != nil {
+		return nil, errors.New(err.Error())
+	}
+	resp.List = &list
+	resp.PageSize = req.PageSize
+	resp.PageNum = req.PageNum
+	resp.Total = total

 	return
 }
--- a/internal/logic/inference/deployinstancestatlogic.go
+++ b/internal/logic/inference/deployinstancestatlogic.go
@ -0,0 +1,47 @@
+package inference
+
+import (
+	"context"
+	"errors"
+
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
+
+	"github.com/zeromicro/go-zero/core/logx"
+)
+
+type DeployInstanceStatLogic struct {
+	logx.Logger
+	ctx    context.Context
+	svcCtx *svc.ServiceContext
+}
+
+func NewDeployInstanceStatLogic(ctx context.Context, svcCtx *svc.ServiceContext) *DeployInstanceStatLogic {
+	return &DeployInstanceStatLogic{
+		Logger: logx.WithContext(ctx),
+		ctx:    ctx,
+		svcCtx: svcCtx,
+	}
+}
+
+func (l *DeployInstanceStatLogic) DeployInstanceStat(req *types.DeployInstanceStatReq) (resp *types.DeployInstanceStatResp, err error) {
+	resp = &types.DeployInstanceStatResp{}
+
+	total, err := l.svcCtx.Scheduler.AiStorages.GetInferDeployInstanceTotalNum()
+	if err != nil {
+		return nil, err
+	}
+
+	running, err := l.svcCtx.Scheduler.AiStorages.GetInferDeployInstanceRunningNum()
+	if err != nil {
+		return nil, err
+	}
+
+	if total == 0 {
+		return nil, errors.New("get statistics failed")
+	}
+
+	resp.Total = total
+	resp.Running = running
+	return resp, nil
+}
--- a/internal/logic/inference/inferencetaskstatlogic.go
+++ b/internal/logic/inference/inferencetaskstatlogic.go
@ -0,0 +1,47 @@
+package inference
+
+import (
+	"context"
+	"errors"
+
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
+
+	"github.com/zeromicro/go-zero/core/logx"
+)
+
+type InferenceTaskStatLogic struct {
+	logx.Logger
+	ctx    context.Context
+	svcCtx *svc.ServiceContext
+}
+
+func NewInferenceTaskStatLogic(ctx context.Context, svcCtx *svc.ServiceContext) *InferenceTaskStatLogic {
+	return &InferenceTaskStatLogic{
+		Logger: logx.WithContext(ctx),
+		ctx:    ctx,
+		svcCtx: svcCtx,
+	}
+}
+
+func (l *InferenceTaskStatLogic) InferenceTaskStat(req *types.InferenceTaskStatReq) (resp *types.InferenceTaskStatResp, err error) {
+	resp = &types.InferenceTaskStatResp{}
+
+	total, err := l.svcCtx.Scheduler.AiStorages.GetInferenceTaskTotalNum()
+	if err != nil {
+		return nil, err
+	}
+
+	running, err := l.svcCtx.Scheduler.AiStorages.GetInferenceTaskRunningNum()
+	if err != nil {
+		return nil, err
+	}
+
+	if total == 0 {
+		return nil, errors.New("get statistics failed")
+	}
+
+	resp.Total = total
+	resp.Running = running
+	return resp, nil
+}
--- a/internal/logic/inference/startdeployinstancelistlogic.go
+++ b/internal/logic/inference/startdeployinstancelistlogic.go
@ -2,11 +2,11 @@ package inference

 import (
 	"context"
-
-	"jcc-coordinator/internal/svc"
-	"jcc-coordinator/internal/types"
-
+	"errors"
 	"github.com/zeromicro/go-zero/core/logx"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
+	"strconv"
 )

 type StartDeployInstanceListLogic struct {
@ -24,7 +24,19 @@ func NewStartDeployInstanceListLogic(ctx context.Context, svcCtx *svc.ServiceCon
 }

 func (l *StartDeployInstanceListLogic) StartDeployInstanceList(req *types.StartDeployInstanceReq) (resp *types.StartDeployInstanceResp, err error) {
-	// todo: add your logic here and delete this line
+	resp = &types.StartDeployInstanceResp{}
+	success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[req.AdapterId][req.ClusterId].StartInferDeployInstance(l.ctx, req.InstanceId)
+	id, err := strconv.ParseInt(req.Id, 10, 64)
+	ins, err := l.svcCtx.Scheduler.AiStorages.GetInferDeployInstanceById(id)
+	if err != nil {
+		return nil, err
+	}

-	return
+	l.svcCtx.Scheduler.AiStorages.UpdateInferDeployInstance(ins)
+
+	if !success {
+		return nil, errors.New("start instance failed")
+	}
+
+	return resp, nil
 }
--- a/internal/logic/inference/stopdeployinstancelogic.go
+++ b/internal/logic/inference/stopdeployinstancelogic.go
@ -2,11 +2,11 @@ package inference

 import (
 	"context"
-
-	"jcc-coordinator/internal/svc"
-	"jcc-coordinator/internal/types"
-
+	"errors"
 	"github.com/zeromicro/go-zero/core/logx"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
+	"strconv"
 )

 type StopDeployInstanceLogic struct {
@ -24,7 +24,18 @@ func NewStopDeployInstanceLogic(ctx context.Context, svcCtx *svc.ServiceContext)
 }

 func (l *StopDeployInstanceLogic) StopDeployInstance(req *types.StopDeployInstanceReq) (resp *types.StopDeployInstanceResp, err error) {
-	// todo: add your logic here and delete this line
+	resp = &types.StopDeployInstanceResp{}
+	success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[req.AdapterId][req.ClusterId].StopInferDeployInstance(l.ctx, req.InstanceId)
+	id, err := strconv.ParseInt(req.Id, 10, 64)
+	ins, err := l.svcCtx.Scheduler.AiStorages.GetInferDeployInstanceById(id)
+	if err != nil {
+		return nil, err
+	}

-	return
+	l.svcCtx.Scheduler.AiStorages.UpdateInferDeployInstance(ins)
+
+	if !success {
+		return nil, errors.New("stop instance failed")
+	}
+	return resp, nil
 }
--- a/internal/scheduler/database/aiStorage.go
+++ b/internal/scheduler/database/aiStorage.go
@ -373,19 +373,20 @@ func (s *AiStorage) AddNoticeInfo(adapterId string, adapterName string, clusterI
 	}
 }

-func (s *AiStorage) SaveInferDeployInstance() (int64, error) {
+func (s *AiStorage) SaveInferDeployInstance(instanceId string, instanceName string, adapterId int64,
+	adapterName string, clusterId int64, clusterName string, modelName string, modelType string, inferCard string) (int64, error) {
 	startTime := time.Now().Format(time.RFC3339)
 	// 构建主任务结构体
 	insModel := models.AiInferDeployInstance{
-		InstanceId:   "",
-		InstanceName: "",
-		AdapterId:    123,
-		AdapterName:  "",
-		ClusterId:    123,
-		ClusterName:  "",
-		ModelName:    "",
-		ModelType:    "",
-		InferCard:    "",
+		InstanceId:   instanceId,
+		InstanceName: instanceName,
+		AdapterId:    adapterId,
+		AdapterName:  adapterName,
+		ClusterId:    clusterId,
+		ClusterName:  clusterName,
+		ModelName:    modelName,
+		ModelType:    modelType,
+		InferCard:    inferCard,
 		Status:       constants.Saved,
 		CreateTime:   startTime,
 		UpdateTime:   startTime,
@ -417,3 +418,73 @@ func (s *AiStorage) GetInferDeployInstanceById(id int64) (*models.AiInferDeployI
 	}
 	return &deployIns, nil
 }
+
+func (s *AiStorage) GetInferDeployInstanceList() ([]*models.AiInferDeployInstance, error) {
+	var list []*models.AiInferDeployInstance
+	tx := s.DbEngin.Raw("select * from ai_infer_deploy_instance").Scan(&list)
+	if tx.Error != nil {
+		logx.Errorf(tx.Error.Error())
+		return nil, tx.Error
+	}
+	return list, nil
+}
+
+func (s *AiStorage) GetInferDeployInstanceTotalNum() (int32, error) {
+	var total int32
+	tx := s.DbEngin.Raw("select count(*) from ai_infer_deploy_instance").Scan(&total)
+	if tx.Error != nil {
+		logx.Errorf(tx.Error.Error())
+		return 0, tx.Error
+	}
+	return total, nil
+}
+
+func (s *AiStorage) GetInferDeployInstanceRunningNum() (int32, error) {
+	var total int32
+	tx := s.DbEngin.Raw("select count(*) from ai_infer_deploy_instance where `status` = 'running'").Scan(&total)
+	if tx.Error != nil {
+		logx.Errorf(tx.Error.Error())
+		return 0, tx.Error
+	}
+	return total, nil
+}
+
+func (s *AiStorage) GetInferenceTaskTotalNum() (int32, error) {
+	var total int32
+	tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 11 or `task_type_dict` = 12").Scan(&total)
+	if tx.Error != nil {
+		logx.Errorf(tx.Error.Error())
+		return 0, tx.Error
+	}
+	return total, nil
+}
+
+func (s *AiStorage) GetInferenceTaskRunningNum() (int32, error) {
+	var total int32
+	tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 11 and `status` = 'Running'").Scan(&total)
+	if tx.Error != nil {
+		logx.Errorf(tx.Error.Error())
+		return 0, tx.Error
+	}
+	return total, nil
+}
+
+func (s *AiStorage) GetTrainingTaskTotalNum() (int32, error) {
+	var total int32
+	tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 10").Scan(&total)
+	if tx.Error != nil {
+		logx.Errorf(tx.Error.Error())
+		return 0, tx.Error
+	}
+	return total, nil
+}
+
+func (s *AiStorage) GetTrainingTaskRunningNum() (int32, error) {
+	var total int32
+	tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 11 and `status` = 'Running'").Scan(&total)
+	if tx.Error != nil {
+		logx.Errorf(tx.Error.Error())
+		return 0, tx.Error
+	}
+	return total, nil
+}
--- a/internal/scheduler/service/updater/deployInstance.go
+++ b/internal/scheduler/service/updater/deployInstance.go
@ -0,0 +1,51 @@
+package updater
+
+import (
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
+	"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
+	"net/http"
+	"strconv"
+)
+
+func UpdateDeployInstanceStatus(svc *svc.ServiceContext, instance *models.AiInferDeployInstance) {
+	amap, found := svc.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(instance.AdapterId, 10)]
+	if !found {
+		return
+	}
+	cmap, found := amap[strconv.FormatInt(instance.ClusterId, 10)]
+	if !found {
+		return
+	}
+	h := http.Request{}
+	ins, err := cmap.GetInferDeployInstance(h.Context(), instance.InstanceId)
+	if err != nil {
+		return
+	}
+	switch instance.ClusterType {
+	case storeLink.TYPE_OCTOPUS:
+		switch ins.Status {
+		case "running":
+			instance.Status = constants.Running
+		case "stopped":
+			instance.Status = constants.Stopped
+		default:
+			instance.Status = ins.Status
+		}
+
+	case storeLink.TYPE_SHUGUANGAI:
+		switch ins.Status {
+		case "running":
+			instance.Status = constants.Running
+		case "Terminated":
+			instance.Status = constants.Stopped
+		default:
+			instance.Status = ins.Status
+		}
+	}
+	err = svc.Scheduler.AiStorages.UpdateInferDeployInstance(instance)
+	if err != nil {
+		return
+	}
+}
--- a/internal/storeLink/octopus.go
+++ b/internal/storeLink/octopus.go
@ -877,6 +877,7 @@ func (o *OctopusLink) GetInferUrl(ctx context.Context, option *option.InferOptio
 		Platform:  o.platform,
 		PageIndex: o.pageIndex,
 		PageSize:  o.pageSize,
+		SearchKey: DEPLOY_INSTANCE_PREFIEX,
 	}
 	list, err := o.octopusRpc.GetNotebookList(ctx, req)
 	if err != nil {
@ -885,9 +886,9 @@ func (o *OctopusLink) GetInferUrl(ctx context.Context, option *option.InferOptio

 	var imageUrls []*inference.InferUrl
 	for _, notebook := range list.Payload.GetNotebooks() {
-		if strings.Contains(notebook.AlgorithmName, option.ModelName) && notebook.Status == "running" {
+		if strings.Contains(notebook.Desc, option.ModelName) && notebook.Status == "running" {
 			url := strings.Replace(notebook.Tasks[0].Url, FORWARD_SLASH, "", -1)
-			names := strings.Split(notebook.AlgorithmName, UNDERSCORE)
+			names := strings.Split(notebook.Desc, FORWARD_SLASH)
 			imageUrl := &inference.InferUrl{
 				Url:  DOMAIN + url,
 				Card: names[2],
@ -955,5 +956,23 @@ func (o *OctopusLink) StopInferDeployInstance(ctx context.Context, id string) bo
 }

 func (o *OctopusLink) GetInferDeployInstance(ctx context.Context, id string) (*inference.DeployInstance, error) {
-	return nil, nil
+	ins := &inference.DeployInstance{}
+
+	req := &octopus.GetNotebookReq{
+		Platform: o.platform,
+		Id:       id,
+	}
+
+	resp, err := o.octopusRpc.GetNotebook(ctx, req)
+	if err != nil {
+		return nil, err
+	}
+	if resp.Payload == nil {
+		return nil, errors.New("instance does not exist")
+	}
+	ins.InstanceName = resp.Payload.Notebook.Name
+	ins.InstanceId = resp.Payload.Notebook.Id
+	ins.ClusterName = o.platform
+	ins.Status = resp.Payload.Notebook.Status
+	return ins, nil
 }
--- a/internal/storeLink/shuguangai.go
+++ b/internal/storeLink/shuguangai.go
@ -851,5 +851,19 @@ func (s *ShuguangAi) StopInferDeployInstance(ctx context.Context, id string) boo
 }

 func (s *ShuguangAi) GetInferDeployInstance(ctx context.Context, id string) (*inference.DeployInstance, error) {
-	return nil, nil
+	ins := &inference.DeployInstance{}
+	req := &hpcAC.GetInstanceServiceDetailReq{
+		Id: id,
+	}
+	resp, err := s.aCRpc.GetInstanceServiceDetail(ctx, req)
+	if err != nil || resp.Code != "0" {
+		return nil, err
+	}
+	ins.InstanceName = resp.Data.InstanceServiceName
+	ins.InstanceId = resp.Data.Id
+	ins.ClusterName = s.platform
+	ins.Status = resp.Data.Status
+	ins.InferCard = DCU
+	ins.CreatedTime = resp.Data.CreateTime
+	return ins, nil
 }
--- a/internal/storeLink/storeLink.go
+++ b/internal/storeLink/storeLink.go
@ -79,8 +79,8 @@ var (
 		4: "制作失败",
 	}
 	ModelTypeMap = map[string][]string{
-		"image_recognition": {"imagenet_resnet50"},
-		"text_to_text":      {"chatGLM_6B"},
+		"image_classification": {"imagenet_resnet50"},
+		"text_to_text":         {"chatGLM_6B"},
 	}
 	AITYPE = map[string]string{
 		"1": OCTOPUS,
--- a/internal/types/types.go
+++ b/internal/types/types.go
@ -2902,6 +2902,11 @@ type AiTask struct {
 	TimeElapsed int32  `json:"elapsed,optional"`
 }

+type TrainingTaskStatResp struct {
+	Running int32 `json:"running"`
+	Total   int32 `json:"total"`
+}
+
 type ChatReq struct {
 	ApiUrl  string                 `json:"apiUrl"`
 	Method  string                 `json:"method,optional"`
@ -5975,7 +5980,8 @@ type DeployInstanceListResp struct {
 type StartDeployInstanceReq struct {
 	AdapterId  string `form:"adapterId"`
 	ClusterId  string `form:"clusterId"`
-	InstanceId string `form:"id"`
+	Id         string `form:"id"`
+	InstanceId string `form:"instanceId"`
 }

 type StartDeployInstanceResp struct {
@ -5984,8 +5990,25 @@ type StartDeployInstanceResp struct {
 type StopDeployInstanceReq struct {
 	AdapterId  string `form:"adapterId"`
 	ClusterId  string `form:"clusterId"`
-	InstanceId string `form:"id"`
+	Id         string `form:"id"`
+	InstanceId string `form:"instanceId"`
 }

 type StopDeployInstanceResp struct {
 }
+
+type DeployInstanceStatReq struct {
+}
+
+type DeployInstanceStatResp struct {
+	Running int32 `json:"running"`
+	Total   int32 `json:"total"`
+}
+
+type InferenceTaskStatReq struct {
+}
+
+type InferenceTaskStatResp struct {
+	Running int32 `json:"running"`
+	Total   int32 `json:"total"`
+}
--- a/pkg/models/aiinferdeployinstancemodel_gen.go
+++ b/pkg/models/aiinferdeployinstancemodel_gen.go
@ -48,6 +48,7 @@ type (
 		Status       string `db:"status"`
 		CreateTime   string `db:"create_time"`
 		UpdateTime   string `db:"update_time"`
+		ClusterType  string `db:"cluster_type"`
 	}
 )