Merge pull request 'updated createdeploytask logics' (#316) from tzwang/pcm-coordinator:master into master

This commit is contained in:
tzwang 2024-09-23 10:53:32 +08:00
commit f1f1697efe
3 changed files with 84 additions and 10 deletions

View File

@ -3,8 +3,10 @@ package inference
import ( import (
"context" "context"
"errors" "errors"
"fmt"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option"
"strconv" "strconv"
"sync"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
@ -40,19 +42,78 @@ func (l *CreateDeployTaskLogic) CreateDeployTask(req *types.CreateDeployTaskReq)
Cmd: "", Cmd: "",
} }
duplicated, err := l.svcCtx.Scheduler.AiStorages.IsDeployTaskNameDuplicated(req.TaskName)
if err != nil {
return nil, err
}
if duplicated {
return nil, errors.New("TaskName doesn't exist")
}
taskId, err := l.svcCtx.Scheduler.AiStorages.SaveInferDeployTask(req.TaskName, req.ModelName, req.ModelType, req.TaskDesc) taskId, err := l.svcCtx.Scheduler.AiStorages.SaveInferDeployTask(req.TaskName, req.ModelName, req.ModelType, req.TaskDesc)
if err != nil { if err != nil {
return nil, err return nil, err
} }
var clusterlen int
for _, c := range req.AdapterClusterMap {
clusterlen += len(c)
}
var errCh = make(chan interface{}, clusterlen)
var errs []interface{}
buf := make(chan bool, 2)
var wg sync.WaitGroup
for aid, v := range req.AdapterClusterMap { for aid, v := range req.AdapterClusterMap {
for _, cid := range v { for _, c := range v {
err = l.createDeployInstance(taskId, aid, cid, opt) wg.Add(1)
if err != nil { cid := c
return nil, err buf <- true
} go func() {
err = l.createDeployInstance(taskId, aid, cid, opt)
if err != nil {
e := struct {
err error
clusterId string
}{
err: err,
clusterId: cid,
}
errCh <- e
wg.Done()
<-buf
return
}
wg.Done()
<-buf
}()
} }
} }
wg.Wait()
close(errCh)
for e := range errCh {
errs = append(errs, e)
}
if len(errs) != 0 {
var msg string
for _, err := range errs {
e := (err).(struct {
err error
clusterId string
})
clusterName, err := l.svcCtx.Scheduler.AiStorages.GetClusterNameById(e.clusterId)
if err != nil {
clusterName = e.clusterId
}
msg += fmt.Sprintf("CreateInstance Failed # clusterName: %v, error: %v \n", clusterName, e.err.Error())
}
return nil, errors.New(msg)
}
return return
} }

View File

@ -103,11 +103,11 @@ func (l *DeployInstanceListLogic) GenerateDeployTasks(tasklist []*models.AiDeplo
return nil, errors.New(err.Error()) return nil, errors.New(err.Error())
} }
if len(list) == 0 { if len(list) == 0 {
err := l.svcCtx.Scheduler.AiStorages.DeleteDeployTaskById(t.Id) //err := l.svcCtx.Scheduler.AiStorages.DeleteDeployTaskById(t.Id)
if err != nil { //if err != nil {
logx.Errorf("db DeleteByDeployTaskId error") // logx.Errorf("db DeleteByDeployTaskId error")
return nil, errors.New(err.Error()) // return nil, errors.New(err.Error())
} //}
continue continue
} }
deployTask := &DeployTask{ deployTask := &DeployTask{

View File

@ -604,3 +604,16 @@ func (s *AiStorage) GetRunningDeployInstanceById(id int64, adapterId string) ([]
} }
return list, nil return list, nil
} }
func (s *AiStorage) IsDeployTaskNameDuplicated(name string) (bool, error) {
var total int32
tx := s.DbEngin.Raw("select count(*) from ai_deploy_instance_task where `name` = ?", name).Scan(&total)
if tx.Error != nil {
return false, tx.Error
}
if total == 0 {
return false, nil
}
return true, nil
}