forked from JointCloud/pcm-coordinator
updated shuguang imageinfer func
This commit is contained in:
parent
349b9660b1
commit
9c6b3801fd
3
go.mod
3
go.mod
|
@ -18,13 +18,12 @@ require (
|
||||||
github.com/prometheus/common v0.59.1
|
github.com/prometheus/common v0.59.1
|
||||||
github.com/robfig/cron/v3 v3.0.1
|
github.com/robfig/cron/v3 v3.0.1
|
||||||
github.com/zeromicro/go-zero v1.7.2
|
github.com/zeromicro/go-zero v1.7.2
|
||||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240918015229-59c579d1a437
|
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240920093406-601f283f0185
|
||||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240918011543-482dcd609877
|
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240918011543-482dcd609877
|
||||||
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240817071412-44397870b110
|
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240817071412-44397870b110
|
||||||
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203
|
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203
|
||||||
gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5
|
gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d
|
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d
|
||||||
gitlink.org.cn/jcce-pcm/utils v0.0.1
|
|
||||||
go.opentelemetry.io/otel/trace v1.29.0
|
go.opentelemetry.io/otel/trace v1.29.0
|
||||||
gonum.org/v1/gonum v0.11.0
|
gonum.org/v1/gonum v0.11.0
|
||||||
google.golang.org/grpc v1.66.0
|
google.golang.org/grpc v1.66.0
|
||||||
|
|
6
go.sum
6
go.sum
|
@ -466,8 +466,8 @@ github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M
|
||||||
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
|
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
|
||||||
github.com/zeromicro/go-zero v1.7.2 h1:a8lyVOG3KXG4LrAy6ZmtJTJtisX4Ostc4Pst4fE704I=
|
github.com/zeromicro/go-zero v1.7.2 h1:a8lyVOG3KXG4LrAy6ZmtJTJtisX4Ostc4Pst4fE704I=
|
||||||
github.com/zeromicro/go-zero v1.7.2/go.mod h1:WFXfF92Exw0O7WECifS6r99JSzv4KEN49x9RhAfgkMc=
|
github.com/zeromicro/go-zero v1.7.2/go.mod h1:WFXfF92Exw0O7WECifS6r99JSzv4KEN49x9RhAfgkMc=
|
||||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240918015229-59c579d1a437 h1:ta6h9+FU7AQ2fNyQiXrZnMdlNBjOKdyBx4e3RF7BE84=
|
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240920093406-601f283f0185 h1:B+YBB5xHlIAS6ILuaCGQwbOpr/L6LOHAlj9PeFUCetM=
|
||||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240918015229-59c579d1a437/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY=
|
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240920093406-601f283f0185/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY=
|
||||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240918011543-482dcd609877 h1:a+1FpxqLPRojlAkJlAeRhKRbxajymXYgrM+s9bfQx0E=
|
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240918011543-482dcd609877 h1:a+1FpxqLPRojlAkJlAeRhKRbxajymXYgrM+s9bfQx0E=
|
||||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240918011543-482dcd609877/go.mod h1:/eOmBFZKWGoabG3sRVkVvIbLwsd2631k4jkUBR6x1AA=
|
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240918011543-482dcd609877/go.mod h1:/eOmBFZKWGoabG3sRVkVvIbLwsd2631k4jkUBR6x1AA=
|
||||||
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240817071412-44397870b110 h1:GaXwr5sgDh0raHjUf9IewTvnRvajYea7zbLsaerYyXo=
|
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240817071412-44397870b110 h1:GaXwr5sgDh0raHjUf9IewTvnRvajYea7zbLsaerYyXo=
|
||||||
|
@ -478,8 +478,6 @@ gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5 h1:+/5vnz
|
||||||
gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5/go.mod h1:97AlUXN13g9UN3+9/DzCHpeoU5sbdyv0IQuTEHNexzQ=
|
gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5/go.mod h1:97AlUXN13g9UN3+9/DzCHpeoU5sbdyv0IQuTEHNexzQ=
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d h1:DHjl/rLuH2gKYtY0MKMGNQDHFT12APg25RlMUQo+tHk=
|
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d h1:DHjl/rLuH2gKYtY0MKMGNQDHFT12APg25RlMUQo+tHk=
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d/go.mod h1:r/KLzUpupCV5jdxSfgDhc2pVjP0fBi3VhAWRttsBn30=
|
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d/go.mod h1:r/KLzUpupCV5jdxSfgDhc2pVjP0fBi3VhAWRttsBn30=
|
||||||
gitlink.org.cn/jcce-pcm/utils v0.0.1 h1:3PH93Z/JFTH5JRO9MFf3dD1Gnd12aGiIIViWBlQGuhE=
|
|
||||||
gitlink.org.cn/jcce-pcm/utils v0.0.1/go.mod h1:5cwaaqM0+HK5GXVbYozGlWvgwoUby0KytdvhbwQW1ks=
|
|
||||||
go.etcd.io/etcd/api/v3 v3.5.15 h1:3KpLJir1ZEBrYuV2v+Twaa/e2MdDCEZ/70H+lzEiwsk=
|
go.etcd.io/etcd/api/v3 v3.5.15 h1:3KpLJir1ZEBrYuV2v+Twaa/e2MdDCEZ/70H+lzEiwsk=
|
||||||
go.etcd.io/etcd/api/v3 v3.5.15/go.mod h1:N9EhGzXq58WuMllgH9ZvnEr7SI9pS0k0+DHZezGp7jM=
|
go.etcd.io/etcd/api/v3 v3.5.15/go.mod h1:N9EhGzXq58WuMllgH9ZvnEr7SI9pS0k0+DHZezGp7jM=
|
||||||
go.etcd.io/etcd/client/pkg/v3 v3.5.15 h1:fo0HpWz/KlHGMCC+YejpiCmyWDEuIpnTDzpJLB5fWlA=
|
go.etcd.io/etcd/client/pkg/v3 v3.5.15 h1:fo0HpWz/KlHGMCC+YejpiCmyWDEuIpnTDzpJLB5fWlA=
|
||||||
|
|
|
@ -463,7 +463,7 @@ func getInferResult(url string, file multipart.File, fileName string, clusterId
|
||||||
switch clusterType {
|
switch clusterType {
|
||||||
case storeLink.TYPE_OCTOPUS:
|
case storeLink.TYPE_OCTOPUS:
|
||||||
r := http.Request{}
|
r := http.Request{}
|
||||||
result, err := iCluster.GetInferResult(r.Context(), url, file, fileName)
|
result, err := iCluster.GetImageInferResult(r.Context(), url, file, fileName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,13 +12,17 @@ const (
|
||||||
|
|
||||||
type ICluster interface {
|
type ICluster interface {
|
||||||
GetClusterInferUrl(ctx context.Context, option *option.InferOption) (*ClusterInferUrl, error)
|
GetClusterInferUrl(ctx context.Context, option *option.InferOption) (*ClusterInferUrl, error)
|
||||||
GetInferResult(ctx context.Context, url string, file multipart.File, fileName string) (string, error)
|
|
||||||
GetInferDeployInstanceList(ctx context.Context) ([]*DeployInstance, error)
|
GetInferDeployInstanceList(ctx context.Context) ([]*DeployInstance, error)
|
||||||
StartInferDeployInstance(ctx context.Context, id string) bool
|
StartInferDeployInstance(ctx context.Context, id string) bool
|
||||||
StopInferDeployInstance(ctx context.Context, id string) bool
|
StopInferDeployInstance(ctx context.Context, id string) bool
|
||||||
GetInferDeployInstance(ctx context.Context, id string) (*DeployInstance, error)
|
GetInferDeployInstance(ctx context.Context, id string) (*DeployInstance, error)
|
||||||
CreateInferDeployInstance(ctx context.Context, option *option.InferOption) (string, error)
|
CreateInferDeployInstance(ctx context.Context, option *option.InferOption) (string, error)
|
||||||
CheckModelExistence(ctx context.Context, modelName string, modelType string) bool
|
CheckModelExistence(ctx context.Context, modelName string, modelType string) bool
|
||||||
|
InferResult
|
||||||
|
}
|
||||||
|
|
||||||
|
type InferResult interface {
|
||||||
|
GetImageInferResult(ctx context.Context, url string, file multipart.File, fileName string) (string, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
type IInference interface {
|
type IInference interface {
|
||||||
|
|
|
@ -767,7 +767,7 @@ func (m *ModelArtsLink) GetInferDeployInstance(ctx context.Context, id string) (
|
||||||
return ins, nil
|
return ins, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *ModelArtsLink) GetInferResult(ctx context.Context, url string, file multipart.File, fileName string) (string, error) {
|
func (m *ModelArtsLink) GetImageInferResult(ctx context.Context, url string, file multipart.File, fileName string) (string, error) {
|
||||||
return "", nil
|
return "", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1183,7 +1183,7 @@ func (o *OctopusLink) GetInferDeployInstance(ctx context.Context, id string) (*i
|
||||||
return ins, nil
|
return ins, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *OctopusLink) GetInferResult(ctx context.Context, url string, file multipart.File, fileName string) (string, error) {
|
func (o *OctopusLink) GetImageInferResult(ctx context.Context, url string, file multipart.File, fileName string) (string, error) {
|
||||||
stream, err := o.octopusRpc.GetInferResult(ctx)
|
stream, err := o.octopusRpc.GetInferResult(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
|
|
|
@ -47,49 +47,60 @@ const (
|
||||||
PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py"
|
PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py"
|
||||||
DATASETS_DIR = "/work/home/acgnnmfbwo/pcmv1/dataset"
|
DATASETS_DIR = "/work/home/acgnnmfbwo/pcmv1/dataset"
|
||||||
ALGORITHM_DIR = "/work/home/acgnnmfbwo/pcmv1/algorithm"
|
ALGORITHM_DIR = "/work/home/acgnnmfbwo/pcmv1/algorithm"
|
||||||
|
KUNSHAN_DIR = "/public/home/acgnnmfbwo/pcmv1"
|
||||||
TRAIN_FILE = "train.py"
|
TRAIN_FILE = "train.py"
|
||||||
CPUCOREPRICEPERHOUR = 0.09
|
CPUCOREPRICEPERHOUR = 0.09
|
||||||
DCUPRICEPERHOUR = 2.0
|
DCUPRICEPERHOUR = 2.0
|
||||||
KB = 1024
|
KB = 1024
|
||||||
TIMEOUT = 20
|
TIMEOUT = 20
|
||||||
DEPLOY_INSTANCE_LIMIT = 100
|
DEPLOY_INSTANCE_LIMIT = 100
|
||||||
|
ProtocolType = "HTTP"
|
||||||
|
ContainerPort = 8881
|
||||||
|
JUPYTER = "jupyter"
|
||||||
)
|
)
|
||||||
|
|
||||||
var RESOURCESGAIMAP = map[string]ResourceSpecSGAI{
|
var (
|
||||||
"WodTB2rJ8SobMgQ1nrtR245jxOrsovFi": {
|
RESOURCESGAIMAP = map[string]ResourceSpecSGAI{
|
||||||
CPU: 1,
|
"WodTB2rJ8SobMgQ1nrtR245jxOrsovFi": {
|
||||||
GPU: 1,
|
CPU: 1,
|
||||||
RAM: 2 * RAM_SIZE_1G,
|
GPU: 1,
|
||||||
},
|
RAM: 2 * RAM_SIZE_1G,
|
||||||
"6d41v1XV53MQPmQOJ5kNatIck9yl8nWZ": {
|
},
|
||||||
CPU: 1,
|
"6d41v1XV53MQPmQOJ5kNatIck9yl8nWZ": {
|
||||||
GPU: 2,
|
CPU: 1,
|
||||||
RAM: 2 * RAM_SIZE_1G,
|
GPU: 2,
|
||||||
},
|
RAM: 2 * RAM_SIZE_1G,
|
||||||
"OBtVaaXAv9n9FbLR7pWAoa3yR13jXwNc": {
|
},
|
||||||
CPU: 2,
|
"OBtVaaXAv9n9FbLR7pWAoa3yR13jXwNc": {
|
||||||
GPU: 3,
|
CPU: 2,
|
||||||
RAM: 4 * RAM_SIZE_1G,
|
GPU: 3,
|
||||||
},
|
RAM: 4 * RAM_SIZE_1G,
|
||||||
"sBWfpkntUzsWYly11kdwEHZOYYIsFmve": {
|
},
|
||||||
CPU: 4,
|
"sBWfpkntUzsWYly11kdwEHZOYYIsFmve": {
|
||||||
GPU: 4,
|
CPU: 4,
|
||||||
RAM: 8 * RAM_SIZE_1G,
|
GPU: 4,
|
||||||
},
|
RAM: 8 * RAM_SIZE_1G,
|
||||||
"jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2": {
|
},
|
||||||
CPU: 5,
|
"jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2": {
|
||||||
GPU: 5,
|
CPU: 5,
|
||||||
RAM: 10 * RAM_SIZE_1G,
|
GPU: 5,
|
||||||
},
|
RAM: 10 * RAM_SIZE_1G,
|
||||||
}
|
},
|
||||||
|
}
|
||||||
|
|
||||||
var RESOURCESPECSAI = map[string]string{
|
RESOURCESPECSAI = map[string]string{
|
||||||
"WodTB2rJ8SobMgQ1nrtR245jxOrsovFi": "CPU:1, DCU:1, RAM:2G",
|
"WodTB2rJ8SobMgQ1nrtR245jxOrsovFi": "CPU:1, DCU:1, RAM:2G",
|
||||||
"6d41v1XV53MQPmQOJ5kNatIck9yl8nWZ": "CPU:1, DCU:2, RAM:2G",
|
"6d41v1XV53MQPmQOJ5kNatIck9yl8nWZ": "CPU:1, DCU:2, RAM:2G",
|
||||||
"OBtVaaXAv9n9FbLR7pWAoa3yR13jXwNc": "CPU:2, DCU:3, RAM:4G",
|
"OBtVaaXAv9n9FbLR7pWAoa3yR13jXwNc": "CPU:2, DCU:3, RAM:4G",
|
||||||
"sBWfpkntUzsWYly11kdwEHZOYYIsFmve": "CPU:4, DCU:4, RAM:8G",
|
"sBWfpkntUzsWYly11kdwEHZOYYIsFmve": "CPU:4, DCU:4, RAM:8G",
|
||||||
"jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2": "CPU:5, DCU:5, RAM:10G",
|
"jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2": "CPU:5, DCU:5, RAM:10G",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ModelNameCmdMap = map[string]string{
|
||||||
|
"blip-image-captioning-base": "pip install transformers python-multipart fastapi uvicorn[standard]; python /public/home/acgnnmfbwo/pcmv1/inference/pytorch/blip_image_captioning_base/infer.py",
|
||||||
|
"imagenet_resnet50": "pip install fastapi uvicorn[standard] python-multipart; python /public/home/acgnnmfbwo/pcmv1/inference/pytorch/imagenet_resnet50/infer.py",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
type ResourceSpecSGAI struct {
|
type ResourceSpecSGAI struct {
|
||||||
CPU int64
|
CPU int64
|
||||||
|
@ -905,15 +916,81 @@ func (s *ShuguangAi) GetInferDeployInstance(ctx context.Context, id string) (*in
|
||||||
return ins, nil
|
return ins, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *ShuguangAi) GetInferResult(ctx context.Context, url string, file multipart.File, fileName string) (string, error) {
|
func (s *ShuguangAi) GetImageInferResult(ctx context.Context, url string, file multipart.File, fileName string) (string, error) {
|
||||||
return "", nil
|
return "", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *ShuguangAi) CreateInferDeployInstance(ctx context.Context, option *option.InferOption) (string, error) {
|
func (s *ShuguangAi) CreateInferDeployInstance(ctx context.Context, option *option.InferOption) (string, error) {
|
||||||
|
containerPortInfoList := []*hpcAC.ContainerPortInfoList{
|
||||||
|
{
|
||||||
|
ProtocolType: ProtocolType,
|
||||||
|
ContainerPort: ContainerPort,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
return "", nil
|
desc := option.ModelType + FORWARD_SLASH + option.ModelName + FORWARD_SLASH + strings.ToLower(DCU)
|
||||||
|
instanceServiceName := "infer_instance" + UNDERSCORE + utils.RandomString(15)
|
||||||
|
resourceGroup := "kshdtest"
|
||||||
|
|
||||||
|
script, ok := ModelNameCmdMap[option.ModelName]
|
||||||
|
if !ok {
|
||||||
|
return "", errors.New("failed to set cmd, ModelName not exist")
|
||||||
|
}
|
||||||
|
|
||||||
|
param := &hpcAC.CreateParams{
|
||||||
|
AcceleratorType: strings.ToLower(DCU),
|
||||||
|
ContainerPortInfoList: containerPortInfoList,
|
||||||
|
CpuNumber: 8,
|
||||||
|
Description: desc,
|
||||||
|
//env
|
||||||
|
GpuNumber: 1,
|
||||||
|
ImagePath: "11.11.100.6:5000/dcu/admin/base/jupyterlab-pytorch:1.13.1-py3.7-dtk23.04-centos7.6",
|
||||||
|
InstanceServiceName: instanceServiceName,
|
||||||
|
MountInfoList: make([]*hpcAC.MountInfoList, 0),
|
||||||
|
//originalVersion
|
||||||
|
RamSize: 10 * RAM_SIZE_1G,
|
||||||
|
//rdma
|
||||||
|
ResourceGroup: resourceGroup,
|
||||||
|
StartScriptActionScope: "all",
|
||||||
|
StartScriptContent: script,
|
||||||
|
//startServiceCommand
|
||||||
|
//taskClassification: "interactive"
|
||||||
|
TaskNumber: 1,
|
||||||
|
TaskType: JUPYTER,
|
||||||
|
TimeoutLimit: "01:00:00",
|
||||||
|
UseStartScript: true,
|
||||||
|
//useStartServiceCommand: false
|
||||||
|
Version: "jupyterlab-pytorch:1.13.1-py3.7-dtk23.04-centos7.6",
|
||||||
|
}
|
||||||
|
|
||||||
|
req := &hpcacclient.CreateInstanceServiceReq{
|
||||||
|
Data: param,
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := s.aCRpc.CreateInstanceService(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
if resp.Code != "0" {
|
||||||
|
return "", errors.New(resp.Msg)
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp.Data, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *ShuguangAi) CheckModelExistence(ctx context.Context, name string, mtype string) bool {
|
func (s *ShuguangAi) CheckModelExistence(ctx context.Context, name string, mtype string) bool {
|
||||||
return false
|
modelPath := "model" + FORWARD_SLASH + name
|
||||||
|
req := &hpcAC.IsExistFileReq{
|
||||||
|
Path: KUNSHAN_DIR + FORWARD_SLASH + modelPath,
|
||||||
|
}
|
||||||
|
resp, err := s.aCRpc.IsExistFile(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.Code != "0" || resp.Data == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp.Data.Exist
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue