forked from mindspore-Ecosystem/mindspore
!6728 [Ascend][DynamicShape] Dynamic shape feature
Merge pull request !6728 from caifubi/dynamic_shape_share_2
This commit is contained in:
commit
c951d42c2c
|
@ -18,6 +18,7 @@ import os
|
|||
import sys
|
||||
from te.platform.cce_conf import te_set_version
|
||||
from te.platform.fusion_util import fusion_op
|
||||
import te
|
||||
from common import check_kernel_info, get_args, get_build_in_impl_path
|
||||
|
||||
build_in_impl_path = get_build_in_impl_path()
|
||||
|
@ -38,6 +39,16 @@ def _initialize(impl_path):
|
|||
|
||||
sys.path.insert(0, op_module_name)
|
||||
|
||||
def _replace_range(args):
|
||||
for arg in args:
|
||||
if not arg.__contains__('range'):
|
||||
continue
|
||||
shape_range = arg["range"]
|
||||
for range_item in shape_range:
|
||||
for index, value in enumerate(range_item):
|
||||
if value < 0:
|
||||
range_item[index] = None
|
||||
|
||||
def build_op(build_type, json_str):
|
||||
"""
|
||||
call op functions with function name and input args json_str
|
||||
|
@ -71,11 +82,18 @@ def build_op(build_type, json_str):
|
|||
outputs_args = get_args(kernel_info['op_info'], 'outputs')
|
||||
attrs_args = get_args(kernel_info['op_info'], 'attrs')
|
||||
kernel_name = kernel_info['op_info']['kernel_name']
|
||||
is_dynamic_shape = kernel_info['op_info']['is_dynamic_shape']
|
||||
if is_dynamic_shape:
|
||||
_replace_range(inputs_args)
|
||||
_replace_range(outputs_args)
|
||||
|
||||
if custom_flag:
|
||||
op_module = __import__(op_name)
|
||||
else:
|
||||
op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0)
|
||||
if is_dynamic_shape:
|
||||
op_module = __import__("impl.dynamic."+op_name, globals(), locals(), [op_name], 0)
|
||||
else:
|
||||
op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0)
|
||||
# get function
|
||||
if build_type == op_build:
|
||||
if custom_flag:
|
||||
|
@ -92,7 +110,12 @@ def build_op(build_type, json_str):
|
|||
if kernel_name[0:19] == "bounding_box_encode":
|
||||
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name_val=kernel_name)
|
||||
|
||||
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
|
||||
if is_dynamic_shape:
|
||||
with te.op.dynamic():
|
||||
op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
|
||||
return te.op.get_compile_info()
|
||||
else:
|
||||
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
|
||||
|
||||
except Exception as e:
|
||||
raise RuntimeError(e)
|
||||
|
|
|
@ -78,6 +78,7 @@ def _check_supported(kernel_info):
|
|||
"""
|
||||
try:
|
||||
op_name = kernel_info['op_info']['name']
|
||||
is_dynamic_shape = kernel_info['op_info']['is_dynamic_shape']
|
||||
impl_path = build_in_impl_path
|
||||
custom_flag = False
|
||||
if 'impl_path' in kernel_info and kernel_info['impl_path'] is not None:
|
||||
|
@ -92,8 +93,11 @@ def _check_supported(kernel_info):
|
|||
|
||||
if custom_flag:
|
||||
op_module = __import__(op_name)
|
||||
elif is_dynamic_shape:
|
||||
op_module = __import__("impl.dynamic." + op_name, globals(), locals(), [op_name], 0)
|
||||
else:
|
||||
op_module = __import__("impl." + op_name, globals(), locals(), [op_name], 0)
|
||||
|
||||
# get function
|
||||
if not hasattr(op_module, "check_supported"):
|
||||
return ""
|
||||
|
|
|
@ -219,6 +219,7 @@ if (ENABLE_D)
|
|||
set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common)
|
||||
set(ASCEND_DRIVER_BACK_PATH ${ASCEND_PATH}/driver/lib64/driver)
|
||||
set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64)
|
||||
set(ASCEND_OPP_PATH ${ASCEND_PATH}/opp/op_impl/built-in/ai_core/tbe/op_tiling)
|
||||
endif()
|
||||
|
||||
MESSAGE("USE DAV LIB PATH: ${ASCEND_PATH}")
|
||||
|
@ -228,7 +229,8 @@ if (ENABLE_D)
|
|||
find_library(TSDCLIENT tsdclient HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
|
||||
find_library(DATATRANSFER datatransfer HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
|
||||
find_library(PROFILING msprofiler ${ASCEND_RUNTIME_PATH})
|
||||
target_link_libraries(mindspore ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT} ${HCCL} ${DATATRANSFER})
|
||||
find_library(OPTILING optiling ${ASCEND_OPP_PATH})
|
||||
target_link_libraries(mindspore ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT} ${HCCL} ${DATATRANSFER} ${OPTILING})
|
||||
target_link_libraries(mindspore -Wl,--start-group proto_input ${PROFILING} mindspore::protobuf -Wl,--end-group)
|
||||
elseif (CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||
target_link_libraries(mindspore -Wl,--start-group proto_input mindspore::protobuf mindspore::sentencepiece -Wl,--end-group)
|
||||
|
@ -258,6 +260,7 @@ if (ENABLE_D)
|
|||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/fwkacllib/lib64)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/add-ons)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe/op_tiling)
|
||||
elseif (ENABLE_GPU)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/cuda/lib64)
|
||||
endif ()
|
||||
|
@ -315,6 +318,8 @@ add_library(inference SHARED
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/backend/session/infer_session.cc
|
||||
${LOAD_ONNX_SRC}
|
||||
)
|
||||
|
||||
set_target_properties(inference PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})
|
||||
target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY}
|
||||
-Wl,--whole-archive mindspore proto_input -Wl,--no-whole-archive mindspore_gvar)
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@ if (ENABLE_D)
|
|||
"akg/akg_kernel_attrs_process.cc"
|
||||
"akg/akg_kernel_metadata.cc"
|
||||
"tbe/*.cc"
|
||||
"host/*.cc"
|
||||
"aicpu/*.cc"
|
||||
"rts/*.cc"
|
||||
"hccl/*.cc"
|
||||
|
|
|
@ -289,51 +289,25 @@ bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
|
||||
if (!anf_node->isa<CNode>()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!AnfAlgo::IsDynamicShape(anf_node)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "CreateExtInfo start, " << anf_node->fullname_with_scope();
|
||||
|
||||
int32_t unknown_shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
|
||||
uint64_t ext_info_head_len = kExtInfoHeadSize;
|
||||
std::string ext_info;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
|
||||
|
||||
// 1.addr:unknown shape type
|
||||
uint64_t ext_info_len = ext_info.size();
|
||||
ext_info_len += ext_info_head_len + sizeof(int32_t);
|
||||
|
||||
// 2.addr:input ShapeAndType
|
||||
ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType);
|
||||
|
||||
// 3.addr:output ShapeAndType
|
||||
ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType);
|
||||
|
||||
uint64_t ext_info_offset = ext_info.size();
|
||||
ext_info.resize(ext_info_len, 0);
|
||||
char *ext_info_buf = ext_info.data();
|
||||
|
||||
uint64_t SetExtInfoShapeType(char *ext_info_buf, uint64_t ext_info_offset) {
|
||||
// deal1: unknown shape type
|
||||
ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
|
||||
info->infoType = FWK_ADPT_EXT_SHAPE_TYPE;
|
||||
info->infoLen = sizeof(int32_t);
|
||||
ext_info_offset += ext_info_head_len;
|
||||
ext_info_offset += kExtInfoHeadSize;
|
||||
int32_t *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset);
|
||||
*shape_type = unknown_shape_type;
|
||||
*shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
|
||||
ext_info_offset += info->infoLen;
|
||||
return ext_info_offset;
|
||||
}
|
||||
|
||||
uint64_t SetExtInfoInputShapeType(char *ext_info_buf, uint64_t ext_info_offset,
|
||||
const std::shared_ptr<AnfNode> &anf_node, size_t input_num) {
|
||||
// deal2:input ShapeAndType
|
||||
info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
|
||||
ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
|
||||
info->infoType = FWK_ADPT_EXT_INPUT_SHAPE;
|
||||
info->infoLen = input_num * sizeof(ShapeAndType);
|
||||
ext_info_offset += ext_info_head_len;
|
||||
ext_info_offset += kExtInfoHeadSize;
|
||||
|
||||
ShapeAndType *inputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
|
||||
for (size_t input_index = 0; input_index < input_num; input_index++) {
|
||||
|
@ -364,12 +338,16 @@ bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_p
|
|||
}
|
||||
}
|
||||
ext_info_offset += info->infoLen;
|
||||
return ext_info_offset;
|
||||
}
|
||||
|
||||
uint64_t SetExtInfoOutputShapeType(char *ext_info_buf, uint64_t ext_info_offset,
|
||||
const std::shared_ptr<AnfNode> &anf_node, size_t output_num) {
|
||||
// deal3:output ShapeAndType
|
||||
info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
|
||||
ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
|
||||
info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE;
|
||||
info->infoLen = output_num * sizeof(ShapeAndType);
|
||||
ext_info_offset += ext_info_head_len;
|
||||
ext_info_offset += kExtInfoHeadSize;
|
||||
|
||||
ShapeAndType *outputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
|
||||
for (size_t output_index = 0; output_index < output_num; output_index++) {
|
||||
|
@ -387,6 +365,47 @@ bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_p
|
|||
}
|
||||
}
|
||||
|
||||
ext_info_offset += info->infoLen;
|
||||
return ext_info_offset;
|
||||
}
|
||||
|
||||
bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
|
||||
if (!anf_node->isa<CNode>()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!AnfAlgo::IsDynamicShape(anf_node)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "CreateExtInfo start, " << anf_node->fullname_with_scope();
|
||||
|
||||
uint64_t ext_info_head_len = kExtInfoHeadSize;
|
||||
std::string ext_info;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
|
||||
|
||||
// 1.addr:unknown shape type
|
||||
uint64_t ext_info_len = ext_info.size();
|
||||
ext_info_len += ext_info_head_len + sizeof(int32_t);
|
||||
|
||||
// 2.addr:input ShapeAndType
|
||||
ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType);
|
||||
|
||||
// 3.addr:output ShapeAndType
|
||||
ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType);
|
||||
|
||||
uint64_t ext_info_offset = ext_info.size();
|
||||
ext_info.resize(ext_info_len, 0);
|
||||
char *ext_info_buf = ext_info.data();
|
||||
|
||||
ext_info_offset = SetExtInfoShapeType(ext_info_buf, ext_info_offset);
|
||||
ext_info_offset = SetExtInfoInputShapeType(ext_info_buf, ext_info_offset, anf_node, input_num);
|
||||
ext_info_offset = SetExtInfoOutputShapeType(ext_info_buf, ext_info_offset, anf_node, output_num);
|
||||
|
||||
MS_LOG(INFO) << "Check ext_info_len:" << ext_info_len << " ext_info_offset:" << ext_info_offset;
|
||||
// set ext info
|
||||
kernel_mod_ptr->SetExtInfo(ext_info);
|
||||
return true;
|
||||
|
|
|
@ -26,8 +26,13 @@
|
|||
#include "utils/convert_utils.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "runtime/device/ascend/executor/ai_cpu_dynamic_kernel.h"
|
||||
#include "runtime/device/kernel_runtime.h"
|
||||
#include "runtime/device/ascend/executor/host_dynamic_kernel.h"
|
||||
|
||||
using AicpuTaskInfoPtr = std::shared_ptr<ge::model_runner::AicpuTaskInfo>;
|
||||
using AicpuDynamicKernel = mindspore::device::ascend::AiCpuDynamicKernel;
|
||||
using HostDynamicKernel = mindspore::device::ascend::HostDynamicKernel;
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
@ -93,7 +98,7 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs
|
|||
param_len += node_def_len;
|
||||
param_len += sizeof(uint32_t);
|
||||
|
||||
AicpuParamHead aicpu_param_head;
|
||||
AicpuParamHead aicpu_param_head{};
|
||||
aicpu_param_head.length = param_len;
|
||||
aicpu_param_head.ioAddrNum = io_addrs_num;
|
||||
|
||||
|
@ -178,5 +183,15 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
|
|||
MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
|
||||
return {task_info_ptr};
|
||||
}
|
||||
|
||||
device::DynamicKernelPtr AicpuOpKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
|
||||
AddressPtrList kernel_inputs;
|
||||
AddressPtrList kernel_workspaces;
|
||||
AddressPtrList kernel_outputs;
|
||||
device::KernelRuntime::GenLaunchArgs(*this, cnode_ptr, &kernel_inputs, &kernel_workspaces, &kernel_outputs);
|
||||
|
||||
CreateCpuKernelInfo(kernel_inputs, kernel_outputs);
|
||||
return std::make_shared<AicpuDynamicKernel>(stream_ptr, cnode_ptr, args_, ext_info_, node_so_, node_name_);
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -31,6 +31,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
|
|||
|
||||
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
|
||||
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
|
||||
|
||||
void SetInputList(const std::vector<int64_t> &inputList);
|
||||
void SetOutputList(const std::vector<int64_t> &outputList);
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
static std::map<int32_t, int32_t> MS_PROTO_DATA_TYPE_MAP = {
|
||||
static const std::map<int32_t, int32_t> kMsProtoDataTypeMap = {
|
||||
{mindspore::TypeId::kTypeUnknown, mindspore::DataType::MS_UNKNOWN},
|
||||
{mindspore::TypeId::kNumberTypeBool, mindspore::DataType::MS_BOOL},
|
||||
{mindspore::TypeId::kNumberTypeInt, mindspore::DataType::MS_INT32},
|
||||
|
@ -39,14 +39,38 @@ static std::map<int32_t, int32_t> MS_PROTO_DATA_TYPE_MAP = {
|
|||
{mindspore::TypeId::kNumberTypeFloat64, mindspore::DataType::MS_FLOAT64},
|
||||
};
|
||||
|
||||
static const std::map<int32_t, int32_t> kProtoDataTypeToMsDataTypeMap = {
|
||||
{mindspore::DataType::MS_UNKNOWN, mindspore::TypeId::kTypeUnknown},
|
||||
{mindspore::DataType::MS_BOOL, mindspore::TypeId::kNumberTypeBool},
|
||||
{mindspore::DataType::MS_INT32, mindspore::TypeId::kNumberTypeInt32},
|
||||
{mindspore::DataType::MS_INT8, mindspore::TypeId::kNumberTypeInt8},
|
||||
{mindspore::DataType::MS_INT16, mindspore::TypeId::kNumberTypeInt16},
|
||||
{mindspore::DataType::MS_INT64, mindspore::TypeId::kNumberTypeInt64},
|
||||
{mindspore::DataType::MS_UINT8, mindspore::TypeId::kNumberTypeUInt8},
|
||||
{mindspore::DataType::MS_UINT16, mindspore::TypeId::kNumberTypeUInt16},
|
||||
{mindspore::DataType::MS_UINT32, mindspore::TypeId::kNumberTypeUInt32},
|
||||
{mindspore::DataType::MS_UINT64, mindspore::TypeId::kNumberTypeUInt64},
|
||||
{mindspore::DataType::MS_FLOAT16, mindspore::TypeId::kNumberTypeFloat16},
|
||||
{mindspore::DataType::MS_FLOAT32, mindspore::TypeId::kNumberTypeFloat32},
|
||||
{mindspore::DataType::MS_FLOAT64, mindspore::TypeId::kNumberTypeFloat64},
|
||||
};
|
||||
|
||||
int AicpuOpUtil::MsTypeToProtoType(TypeId ms_type) {
|
||||
auto iter = MS_PROTO_DATA_TYPE_MAP.find(ms_type);
|
||||
if (iter != MS_PROTO_DATA_TYPE_MAP.end()) {
|
||||
return MS_PROTO_DATA_TYPE_MAP[ms_type];
|
||||
} else {
|
||||
auto iter = kMsProtoDataTypeMap.find(ms_type);
|
||||
if (iter == kMsProtoDataTypeMap.end()) {
|
||||
MS_LOG(ERROR) << "UnSupported ms_type value" << static_cast<int>(ms_type);
|
||||
return -1;
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
int AicpuOpUtil::ProtoTypeToMsType(int proto_type) {
|
||||
auto iter = kProtoDataTypeToMsDataTypeMap.find(proto_type);
|
||||
if (iter == kProtoDataTypeToMsDataTypeMap.end()) {
|
||||
MS_LOG(ERROR) << "UnSupported proto_type value:" << proto_type;
|
||||
return -1;
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -55,13 +55,6 @@ struct AicpuParamHead {
|
|||
uint64_t extInfoAddr; // extInfo address
|
||||
} __attribute__((packed));
|
||||
|
||||
const uint32_t kExtInfoHeadSize = 8;
|
||||
struct ExtInfo {
|
||||
int32_t infoType; // extend type
|
||||
uint32_t infoLen; // length for infoMsg
|
||||
char infoMsg[0]; // extend value
|
||||
} __attribute__((packed));
|
||||
|
||||
// Extent info ShapeAndType
|
||||
const uint32_t kMaxShapeDims = 8;
|
||||
struct ShapeAndType {
|
||||
|
@ -69,6 +62,14 @@ struct ShapeAndType {
|
|||
int64_t dims[kMaxShapeDims];
|
||||
} __attribute__((packed));
|
||||
|
||||
// Extend info structure for extInfoAddr
|
||||
const uint32_t kExtInfoHeadSize = 8;
|
||||
struct ExtInfo {
|
||||
int32_t infoType; // extend type
|
||||
uint32_t infoLen; // length for infoMsg
|
||||
char infoMsg[0]; // extend value
|
||||
} __attribute__((packed));
|
||||
|
||||
// Extend Info type for task
|
||||
enum FWKTaskExtInfoType {
|
||||
FWK_ADPT_EXT_SHAPE_TYPE = 0,
|
||||
|
@ -88,6 +89,7 @@ enum UnknowShapeOpType {
|
|||
class AicpuOpUtil {
|
||||
public:
|
||||
static int MsTypeToProtoType(TypeId ms_type);
|
||||
static int ProtoTypeToMsType(int proto_type);
|
||||
|
||||
private:
|
||||
// kernel id
|
||||
|
|
|
@ -15,15 +15,34 @@
|
|||
*/
|
||||
|
||||
#include "backend/kernel_compiler/hccl/hccl_kernel.h"
|
||||
|
||||
#include <map>
|
||||
#include "runtime/device/ascend/tasksink/runtime_utils.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "utils/utils.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "runtime/device/kernel_runtime.h"
|
||||
#include "runtime/device/ascend/executor/hccl_dynamic_kernel.h"
|
||||
|
||||
using HcclTaskInfoPtr = std::shared_ptr<ge::model_runner::HcclTaskInfo>;
|
||||
using ge::model_runner::HcclTaskInfo;
|
||||
using mindspore::device::ascend::tasksink::RuntimeUtils;
|
||||
|
||||
namespace {
|
||||
static std::map<std::string, std::string> kMsOpNameToHcomHcclType = {
|
||||
{mindspore::kAllReduceOpName, mindspore::kHcomOpTypeAllReduce},
|
||||
{mindspore::kAllGatherOpName, mindspore::kHcomOpTypeAllGather},
|
||||
{mindspore::kBroadcastOpName, mindspore::kHcomOpTypeBroadcast},
|
||||
{mindspore::kReduceScatterOpName, mindspore::kHcomOpTypeReduceScatter}};
|
||||
std::string MsOpNameToHcomOpType(const std::string &ms_op_type) {
|
||||
auto iter = kMsOpNameToHcomHcclType.find(ms_op_type);
|
||||
if (iter == kMsOpNameToHcomHcclType.end()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid MsOpType:" << ms_op_type;
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
void HcclKernelFactory::Registe(const std::string &name, HcclKernelCreater &&fun) {
|
||||
|
@ -156,5 +175,30 @@ std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inpu
|
|||
MS_EXCEPTION_IF_NULL(task_info_ptr);
|
||||
return {task_info_ptr};
|
||||
}
|
||||
|
||||
device::DynamicKernelPtr HcclKernel::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
|
||||
AddressPtrList inputs;
|
||||
AddressPtrList workspaces;
|
||||
AddressPtrList outputs;
|
||||
device::KernelRuntime::GenLaunchArgs(*this, cnode_ptr, &inputs, &workspaces, &outputs);
|
||||
|
||||
std::string hccl_type = MsOpNameToHcomOpType(AnfAlgo::GetCNodeName(anf_node_));
|
||||
|
||||
if (inputs.empty()) {
|
||||
MS_LOG(EXCEPTION) << "Hccl kernel input is empty";
|
||||
}
|
||||
if (hccl_data_type_list_.empty()) {
|
||||
MS_LOG(EXCEPTION) << "Hccl data type list is empty";
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(inputs.at(0));
|
||||
auto input_data_addr = inputs.at(0)->addr;
|
||||
MS_EXCEPTION_IF_NULL(outputs.at(0));
|
||||
auto output_data_addr = outputs.at(0)->addr;
|
||||
HcclDataType data_type = hccl_data_type_list_[0];
|
||||
|
||||
auto executor = std::make_shared<device::ascend::HcclDynamicKernel>(
|
||||
hccl_type, input_data_addr, output_data_addr, hccl_count_, data_type, op_type_, root_id_, stream_ptr, cnode_ptr);
|
||||
return executor;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -41,6 +41,7 @@ class HcclKernel : public AscendKernelMod {
|
|||
const std::vector<size_t> &GetWorkspaceSizeList() const override;
|
||||
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
|
||||
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
|
||||
|
||||
protected:
|
||||
std::vector<std::vector<size_t>> hccl_kernel_input_shape_list_;
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/host/dynamic_shape_kernel.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
void DynamicShapeKernel::Execute() {
|
||||
MS_LOG(INFO) << "Execute DynamicShapeKernel Start";
|
||||
auto input_num = AnfAlgo::GetInputTensorNum(cnode_ptr_);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Invalid Input Num:" << input_num;
|
||||
}
|
||||
|
||||
auto prev_output_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode_ptr_, 0);
|
||||
auto output_shape = std::vector<int>(SizeToInt(prev_output_shape.size()));
|
||||
|
||||
auto output_type = TypeId::kNumberTypeInt32;
|
||||
|
||||
auto output_tensor_for_sync = std::make_shared<tensor::Tensor>(output_type, output_shape);
|
||||
auto data_ptr = static_cast<int32_t *>(output_tensor_for_sync->data_c());
|
||||
for (size_t i = 0; i < prev_output_shape.size(); ++i) {
|
||||
MS_LOG(INFO) << "DEBUG prev_output_shape[" << i << "]:" << prev_output_shape[i];
|
||||
*(data_ptr + i) = prev_output_shape[i];
|
||||
}
|
||||
|
||||
auto output_addr = AnfAlgo::GetOutputAddr(cnode_ptr_, 0);
|
||||
MS_EXCEPTION_IF_NULL(output_addr);
|
||||
output_addr->SyncHostToDevice(output_shape, LongToSize(output_tensor_for_sync->data().nbytes()),
|
||||
output_tensor_for_sync->data_type(), output_tensor_for_sync->data_c());
|
||||
MS_LOG(INFO) << "Execute DynamicShapeKernel End";
|
||||
}
|
||||
|
||||
device::DynamicKernelPtr DynamicShapeKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
|
||||
return std::make_shared<DynamicShapeKernel>(stream_ptr, cnode_ptr);
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,43 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_DYNAMIC_SHAPE_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_DYNAMIC_SHAPE_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "runtime/device/ascend/executor/host_dynamic_kernel.h"
|
||||
#include "backend/kernel_compiler/host/host_kernel_mod.h"
|
||||
using HostDynamicKernel = mindspore::device::ascend::HostDynamicKernel;
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class DynamicShapeKernel : public HostDynamicKernel {
|
||||
public:
|
||||
DynamicShapeKernel(void *stream, const CNodePtr &cnode_ptr) : HostDynamicKernel(stream, cnode_ptr) {}
|
||||
~DynamicShapeKernel() override = default;
|
||||
void Execute() override;
|
||||
};
|
||||
|
||||
class DynamicShapeKernelMod : public HostKernelMod {
|
||||
public:
|
||||
DynamicShapeKernelMod() = default;
|
||||
~DynamicShapeKernelMod() override = default;
|
||||
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
|
||||
};
|
||||
MS_HOST_REG_KERNEL(DynamicShape, DynamicShapeKernelMod);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_DYNAMIC_SHAPE_KERNEL_H_
|
|
@ -0,0 +1,42 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/kernel_compiler/host/host_kernel_build.h"
|
||||
#include <string>
|
||||
#include "runtime/device/kernel_runtime.h"
|
||||
#include "backend/kernel_compiler/host/host_kernel_mod.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "backend/session/kernel_graph.h"
|
||||
#include "backend/kernel_compiler/common_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
KernelModPtr HostOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
std::string opname = AnfAlgo::GetCNodeName(anf_node);
|
||||
MS_LOG(INFO) << "Host op [" << opname << "]";
|
||||
auto kerPtr = HostKernelFactory::Get(opname);
|
||||
if (kerPtr == nullptr) {
|
||||
MS_LOG(ERROR) << "Host can't find Kernel[" << opname << "]";
|
||||
return nullptr;
|
||||
}
|
||||
if (!kerPtr->Init(anf_node)) {
|
||||
MS_LOG(ERROR) << "Host Kernel initialize failed!";
|
||||
return nullptr;
|
||||
}
|
||||
return kerPtr;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,27 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_BUILD_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_BUILD_H_
|
||||
#include <memory>
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
KernelModPtr HostOpBuild(const std::shared_ptr<AnfNode> &anf_node);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_BUILD_H_
|
|
@ -0,0 +1,59 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/host/host_kernel_metadata.h"
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/oplib/oplib.h"
|
||||
#include "backend/kernel_compiler/common_utils.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
constexpr auto kDynamicShape = "DynamicShape";
|
||||
|
||||
void HostMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) {
|
||||
MS_LOG(INFO) << "HostMetadataInfo.";
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
MS_EXCEPTION_IF_NULL(kernel_info_list);
|
||||
std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
if (op_name != kDynamicShape) {
|
||||
MS_LOG(DEBUG) << "Host does not have op [" << op_name << "]";
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<std::string> inputs_format{};
|
||||
std::vector<TypeId> inputs_type{};
|
||||
for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(kernel_node); ++input_index) {
|
||||
inputs_format.emplace_back(kOpFormat_DEFAULT);
|
||||
inputs_type.push_back(AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index));
|
||||
}
|
||||
std::vector<std::string> outputs_format;
|
||||
std::vector<TypeId> outputs_type;
|
||||
for (size_t output_index = 0; output_index < AnfAlgo::GetOutputTensorNum(kernel_node); ++output_index) {
|
||||
outputs_format.emplace_back(kOpFormat_DEFAULT);
|
||||
outputs_type.push_back(AnfAlgo::GetOutputInferDataType(kernel_node, output_index));
|
||||
}
|
||||
auto builder = KernelBuildInfo::KernelBuildInfoBuilder();
|
||||
builder.SetInputsFormat(inputs_format);
|
||||
builder.SetInputsDeviceType(inputs_type);
|
||||
builder.SetOutputsFormat(outputs_format);
|
||||
builder.SetOutputsDeviceType(outputs_type);
|
||||
builder.SetKernelType(HOST_KERNEL);
|
||||
kernel_info_list->push_back(builder.Build());
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,30 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_META_DATA_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_META_DATA_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "backend/kernel_compiler/kernel_build_info.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
void HostMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_META_DATA_H_
|
|
@ -0,0 +1,98 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/host/host_kernel_mod.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "runtime/mem.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "runtime/device/kernel_runtime.h"
|
||||
#include "runtime/device/ascend/executor/host_dynamic_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
void HostKernelFactory::Registe(const std::string &name, HostKernelCreater &&fun) {
|
||||
hostKernelMap_.emplace(name, std::move(fun));
|
||||
}
|
||||
|
||||
std::shared_ptr<HostKernelMod> HostKernelFactory::Get(const std::string &name) {
|
||||
const auto &map = Get().hostKernelMap_;
|
||||
auto it = map.find(name);
|
||||
if (it != map.end() && it->second) {
|
||||
return (it->second)();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
HostKernelFactory &HostKernelFactory::Get() {
|
||||
static HostKernelFactory instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
const std::vector<size_t> &HostKernelMod::GetInputSizeList() const { return input_size_list_; }
|
||||
const std::vector<size_t> &HostKernelMod::GetOutputSizeList() const { return output_size_list_; }
|
||||
const std::vector<size_t> &HostKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
|
||||
bool HostKernelMod::Init(const AnfNodePtr &anf_node) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
|
||||
|
||||
for (size_t i = 0; i < input_num; i++) {
|
||||
std::vector<size_t> shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i);
|
||||
TypePtr type_ptr = TypeIdToType(AnfAlgo::GetInputDeviceDataType(anf_node, i));
|
||||
MS_EXCEPTION_IF_NULL(type_ptr);
|
||||
int64_t size_i = 1;
|
||||
for (size_t j = 0; j < shape_i.size(); j++) {
|
||||
size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));
|
||||
}
|
||||
size_t type_byte = GetTypeByte(type_ptr);
|
||||
if (type_byte == 0) {
|
||||
return false;
|
||||
}
|
||||
size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));
|
||||
input_size_list_.push_back(LongToSize(size_i));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < output_num; i++) {
|
||||
std::vector<size_t> shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i);
|
||||
TypePtr type_ptr = TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, i));
|
||||
MS_EXCEPTION_IF_NULL(type_ptr);
|
||||
int64_t size_i = 1;
|
||||
for (size_t j = 0; j < shape_i.size(); j++) {
|
||||
size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));
|
||||
}
|
||||
size_t type_byte = GetTypeByte(type_ptr);
|
||||
if (type_byte == 0) {
|
||||
return false;
|
||||
}
|
||||
size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));
|
||||
output_size_list_.push_back(LongToSize(size_i));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool HostKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
|
||||
return true;
|
||||
}
|
||||
std::vector<TaskInfoPtr> HostKernelMod::GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &, uint32_t) {
|
||||
return {};
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,86 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_MOD_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_MOD_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include "backend/kernel_compiler/ascend_kernel_mod.h"
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class HostKernelMod : public AscendKernelMod {
|
||||
public:
|
||||
HostKernelMod() = default;
|
||||
~HostKernelMod() override = default;
|
||||
const std::vector<size_t> &GetInputSizeList() const override;
|
||||
const std::vector<size_t> &GetOutputSizeList() const override;
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override;
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &, uint32_t) override;
|
||||
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override = 0;
|
||||
bool Init(const AnfNodePtr &anf_node);
|
||||
|
||||
protected:
|
||||
AnfNodePtr anf_node_;
|
||||
std::string op_name_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
};
|
||||
|
||||
using HostKernelModPtr = std::shared_ptr<HostKernelMod>;
|
||||
using HostKernelModPtrList = std::vector<HostKernelModPtr>;
|
||||
using HostKernelCreater = std::function<std::shared_ptr<HostKernelMod>()>;
|
||||
|
||||
class HostKernelFactory {
|
||||
HostKernelFactory() = default;
|
||||
~HostKernelFactory() = default;
|
||||
|
||||
public:
|
||||
static HostKernelFactory &Get();
|
||||
void Registe(const string &name, HostKernelCreater &&fun);
|
||||
static std::shared_ptr<HostKernelMod> Get(const string &name);
|
||||
|
||||
private:
|
||||
std::map<string, HostKernelCreater> hostKernelMap_;
|
||||
};
|
||||
|
||||
class _HostKernelRegister {
|
||||
public:
|
||||
_HostKernelRegister(const string &name, HostKernelCreater &&fun) {
|
||||
HostKernelFactory::Get().Registe(name, std::move(fun));
|
||||
}
|
||||
~_HostKernelRegister() = default;
|
||||
};
|
||||
|
||||
#define _MS_HOST_REG_KERNEL_REG(KNAME, clazz) \
|
||||
static_assert(std::is_base_of<HostKernelMod, clazz>::value, " must be base of HostKernelMod"); \
|
||||
static const _HostKernelRegister g_##KNAME##_##_kernel_reg(#KNAME, []() { \
|
||||
std::shared_ptr<clazz> ptr = nullptr; \
|
||||
ptr = std::make_shared<clazz>(); \
|
||||
MS_EXCEPTION_IF_NULL(ptr); \
|
||||
return ptr; \
|
||||
});
|
||||
|
||||
#define MS_HOST_REG_KERNEL(KNAME, clazz) _MS_HOST_REG_KERNEL_REG(KNAME, clazz)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_MOD_H_
|
|
@ -174,6 +174,9 @@ void KernelPack::ParseKernelJson(const nlohmann::json &js) {
|
|||
kernel_json_info_.block_dim = js["blockDim"];
|
||||
kernel_json_info_.kernel_name = js["kernelName"];
|
||||
kernel_json_info_.magic = js["magic"];
|
||||
if (js.contains("opParaSize")) {
|
||||
kernel_json_info_.op_para_size = js["opParaSize"];
|
||||
}
|
||||
if (js.find("parameters") != js.end()) {
|
||||
if (!js.at("parameters").is_array()) {
|
||||
MS_LOG(DEBUG) << "Format error!,parameters should be array.";
|
||||
|
|
|
@ -25,9 +25,18 @@
|
|||
#include "ir/tensor.h"
|
||||
#include "abstract/dshape.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "runtime/device/executor/dynamic_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
enum KernelType : int { UNKNOWN_KERNEL_TYPE = 0, AKG_KERNEL, AICPU_KERNEL, RT_KERNEL, HCCL_KERNEL, TBE_KERNEL };
|
||||
enum KernelType : int {
|
||||
UNKNOWN_KERNEL_TYPE = 0,
|
||||
AKG_KERNEL,
|
||||
AICPU_KERNEL,
|
||||
RT_KERNEL,
|
||||
HCCL_KERNEL,
|
||||
TBE_KERNEL,
|
||||
HOST_KERNEL
|
||||
};
|
||||
|
||||
namespace kernel {
|
||||
// Supported fusion type
|
||||
|
@ -69,7 +78,8 @@ struct KernelJsonInfo {
|
|||
std::vector<size_t> parameters;
|
||||
std::string sha256;
|
||||
std::vector<size_t> workspaces;
|
||||
KernelJsonInfo() : block_dim(0) {}
|
||||
uint32_t op_para_size;
|
||||
KernelJsonInfo() : block_dim(0), op_para_size(0) {}
|
||||
};
|
||||
|
||||
class KernelPack {
|
||||
|
@ -118,6 +128,7 @@ class KernelMod {
|
|||
virtual const std::vector<size_t> &GetWorkspaceSizeList() const = 0;
|
||||
virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) = 0;
|
||||
virtual device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) { return nullptr; }
|
||||
virtual std::vector<size_t> GenParameters() { return {}; }
|
||||
virtual void ReleaseResource() {}
|
||||
|
||||
|
|
|
@ -83,8 +83,8 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
|
|||
while (!build_manger->IsAllTaskFinish()) {
|
||||
int task_id = -1;
|
||||
std::string task_result;
|
||||
std::string pre_build_result;
|
||||
auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result);
|
||||
std::string build_result;
|
||||
auto ret = build_manger->WaitOne(&task_id, &task_result, &build_result);
|
||||
if (!ret) {
|
||||
MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id;
|
||||
}
|
||||
|
@ -94,7 +94,7 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
|
|||
<< " change to single op build.";
|
||||
build_failed_num++;
|
||||
}
|
||||
auto kernel_mod_item = build_manger->TaskFinishProcess(task_id, false);
|
||||
auto kernel_mod_item = build_manger->TaskFinishProcess(task_id, build_result, false);
|
||||
if (kernel_mod_item.second != nullptr) {
|
||||
(void)kernel_mod_ret.emplace(kernel_mod_item);
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <memory>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h"
|
||||
#include "backend/kernel_compiler/host/host_kernel_metadata.h"
|
||||
#include "backend/kernel_compiler/rts/rt_kernel_info.h"
|
||||
#include "backend/kernel_compiler/hccl/hccl_kernel_metadata.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h"
|
||||
|
@ -86,6 +87,9 @@ void KernelQueryAll(const CNodePtr &kernel_node,
|
|||
if (kernel_info_list->empty()) {
|
||||
HcclMetadataInfo(kernel_node, kernel_info_list);
|
||||
}
|
||||
if (kernel_info_list->empty()) {
|
||||
HostMetadataInfo(kernel_node, kernel_info_list);
|
||||
}
|
||||
if (kernel_info_list->empty()) {
|
||||
MS_EXCEPTION(NotExistsError)
|
||||
<< "Failed to obtain operator info, Please check whether the operator info is registered, Op full name:"
|
||||
|
|
|
@ -102,6 +102,7 @@ class OpInfo {
|
|||
kernel_name_ = opinfo.kernel_name();
|
||||
partial_flag_ = opinfo.partial_flag_;
|
||||
dynamic_format_ = opinfo.dynamic_format_;
|
||||
dynamic_shape_ = opinfo.dynamic_shape_;
|
||||
op_pattern_ = opinfo.op_pattern();
|
||||
processor_ = opinfo.processor_;
|
||||
for (const auto &attr : opinfo.attrs_ptr()) {
|
||||
|
@ -122,12 +123,14 @@ class OpInfo {
|
|||
std::string fusion_type() const { return fusion_type_; }
|
||||
std::string kernel_name() const { return kernel_name_; }
|
||||
OpPattern op_pattern() const { return op_pattern_; }
|
||||
bool dynamic_shape() const { return dynamic_shape_; }
|
||||
std::string processor() const { return processor_; }
|
||||
std::vector<std::shared_ptr<OpAttr>> attrs_ptr() const { return attrs_ptr_; }
|
||||
std::vector<std::shared_ptr<OpIOInfo>> inputs_ptr() const { return inputs_ptr_; }
|
||||
std::vector<std::shared_ptr<OpIOInfo>> outputs_ptr() const { return outputs_ptr_; }
|
||||
const std::unordered_map<size_t, size_t> &ref_infos() const { return ref_infos_; }
|
||||
|
||||
void set_dynamic_shape(bool dynamic_shape) { dynamic_shape_ = dynamic_shape; }
|
||||
void set_op_name(const std::string &op_name) { op_name_ = op_name; }
|
||||
void set_imply_type(const OpImplyType imply_type) { imply_type_ = imply_type; }
|
||||
void set_impl_path(const std::string &impl_path) { impl_path_ = impl_path; }
|
||||
|
@ -149,7 +152,8 @@ class OpInfo {
|
|||
void ClearOutputs() { (void)outputs_ptr_.clear(); }
|
||||
bool equals_to(const std::shared_ptr<OpInfo> &other_info) const {
|
||||
return this->op_name_ == other_info->op_name_ && this->imply_type_ == other_info->imply_type_ &&
|
||||
this->processor_ == other_info->processor_;
|
||||
this->processor_ == other_info->processor_ && this->op_pattern_ == other_info->op_pattern_ &&
|
||||
this->dynamic_shape_ == other_info->dynamic_shape_;
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -163,6 +167,7 @@ class OpInfo {
|
|||
std::string kernel_name_;
|
||||
bool partial_flag_ = false;
|
||||
bool dynamic_format_ = false;
|
||||
bool dynamic_shape_ = false;
|
||||
OpPattern op_pattern_ = kCommonPattern;
|
||||
std::string processor_;
|
||||
std::vector<std::shared_ptr<OpAttr>> attrs_ptr_;
|
||||
|
|
|
@ -38,6 +38,7 @@ constexpr auto kDynamicFormat = "dynamicFormat";
|
|||
constexpr auto kFormatAgnostic = "formatAgnostic";
|
||||
constexpr auto kBroadcast = "broadcast";
|
||||
constexpr auto kReduce = "reduce";
|
||||
constexpr auto kDynamicShape = "dynamic_shape";
|
||||
constexpr auto kDtypeFormat = "dtype_format";
|
||||
constexpr auto kAttr = "attr";
|
||||
constexpr auto kIputs = "inputs";
|
||||
|
@ -111,6 +112,10 @@ void OpLib::DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_p
|
|||
op_info->set_kernel_name(obj.at(kKernelName));
|
||||
op_info->set_partial_flag(obj.at(kPartialFlag));
|
||||
|
||||
if (obj.find(kDynamicShape) != obj.end()) {
|
||||
op_info->set_dynamic_shape(obj.at(kDynamicShape));
|
||||
}
|
||||
|
||||
if (obj.find(kOpPattern) != obj.end()) {
|
||||
std::string op_pattern = obj.at(kOpPattern);
|
||||
auto find_iter = kOpPatternMap.find(op_pattern);
|
||||
|
@ -322,7 +327,7 @@ bool OpLib::DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply
|
|||
return ret;
|
||||
}
|
||||
|
||||
std::shared_ptr<OpInfo> OpLib::FindOp(const std::string &op_name, OpImplyType imply_type) {
|
||||
std::shared_ptr<OpInfo> OpLib::FindOp(const std::string &op_name, OpImplyType imply_type, bool is_dynamic_shape) {
|
||||
if (!OpLib::RegOpFromLocalInfo()) {
|
||||
MS_LOG(INFO) << "Warning reg local op info failed.";
|
||||
}
|
||||
|
@ -338,16 +343,20 @@ std::shared_ptr<OpInfo> OpLib::FindOp(const std::string &op_name, OpImplyType im
|
|||
for (auto [iter, end] = op_info_.equal_range(op_name); iter != end; ++iter) {
|
||||
auto &op_info = iter->second;
|
||||
MS_EXCEPTION_IF_NULL(op_info);
|
||||
|
||||
if (op_info->imply_type() != imply_type) {
|
||||
continue;
|
||||
}
|
||||
if (imply_type == kAKG && op_info->processor() != target_processor) {
|
||||
continue;
|
||||
}
|
||||
if (is_dynamic_shape && !op_info->dynamic_shape()) {
|
||||
continue;
|
||||
}
|
||||
return op_info;
|
||||
}
|
||||
MS_LOG(INFO) << "FindOp failed: opname: " << op_name << ", imply_type: " << ImplTypeToStr(imply_type)
|
||||
<< ", current op num: " << op_info_.size();
|
||||
<< ", current op num: " << op_info_.size() << " is_dynamic_shape:" << is_dynamic_shape;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -32,7 +32,8 @@ class OpLib {
|
|||
virtual ~OpLib() = default;
|
||||
static bool RegOp(const std::string &json_string, const std::string &impl_path);
|
||||
static void RegOpInfo(const std::shared_ptr<OpInfo> &opinfo) { op_info_.emplace(opinfo->op_name(), opinfo); }
|
||||
static std::shared_ptr<OpInfo> FindOp(const std::string &op_name, OpImplyType imply_type);
|
||||
static std::shared_ptr<OpInfo> FindOp(const std::string &op_name, OpImplyType imply_type,
|
||||
bool is_dynamic_shape = false);
|
||||
static const std::multimap<std::string, std::shared_ptr<OpInfo>> &GetAllOpsInfo() { return op_info_; }
|
||||
|
||||
protected:
|
||||
|
|
|
@ -21,9 +21,14 @@
|
|||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "common/trans.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "runtime/device/kernel_runtime.h"
|
||||
#include "runtime/device/ascend/executor/rts/memcpy_rts_dynamic_kernel.h"
|
||||
|
||||
using ge::model_runner::MemcpyAsyncTaskInfo;
|
||||
using MemcpyAsyncTaskInfoPtr = std::shared_ptr<MemcpyAsyncTaskInfo>;
|
||||
using AddressPtrList = std::vector<mindspore::kernel::AddressPtr>;
|
||||
using mindspore::device::ascend::MemcpyRtsDynamicKernel;
|
||||
using MemcpyRtsDynamicKernelPtr = std::shared_ptr<MemcpyRtsDynamicKernel>;
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
@ -122,6 +127,32 @@ std::vector<TaskInfoPtr> MemCpyAsyncKernel::GenTask(const std::vector<AddressPtr
|
|||
MS_EXCEPTION_IF_NULL(task_info_ptr);
|
||||
return {task_info_ptr};
|
||||
}
|
||||
device::DynamicKernelPtr MemCpyAsyncKernel::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
|
||||
AddressPtrList kernel_inputs;
|
||||
AddressPtrList kernel_workspaces;
|
||||
AddressPtrList kernel_outputs;
|
||||
device::KernelRuntime::GenLaunchArgs(*this, cnode_ptr, &kernel_inputs, &kernel_workspaces, &kernel_outputs);
|
||||
|
||||
if (kernel_inputs.size() != 1) {
|
||||
MS_LOG(EXCEPTION) << "MemCpyAsync op inputs is not one";
|
||||
}
|
||||
|
||||
if (kernel_outputs.size() != 1) {
|
||||
MS_LOG(EXCEPTION) << "MemCpyAsync op output is not one";
|
||||
}
|
||||
|
||||
if (kernel_outputs[0]->size < kernel_inputs[0]->size) {
|
||||
MS_LOG(EXCEPTION) << "Check rtMemcpyAsync destMax < src size";
|
||||
}
|
||||
// input x -> memcpy_async -> AllReduce
|
||||
if (kernel_outputs[0]->size > kernel_inputs[0]->size) {
|
||||
MS_LOG(WARNING) << "Check rtMemcpyAsync destMax > src size";
|
||||
}
|
||||
|
||||
return std::make_shared<MemcpyRtsDynamicKernel>(stream_ptr, cnode_ptr, kernel_outputs[0]->addr,
|
||||
kernel_outputs[0]->size, kernel_inputs[0]->addr,
|
||||
kernel_inputs[0]->size);
|
||||
}
|
||||
|
||||
const std::vector<TypeId> data_type_list{kNumberTypeInt, kNumberTypeInt8, kNumberTypeInt16, kNumberTypeInt32,
|
||||
kNumberTypeInt64, kNumberTypeUInt, kNumberTypeUInt8, kNumberTypeUInt16,
|
||||
|
|
|
@ -34,6 +34,7 @@ class MemCpyAsyncKernel : public RtKernel {
|
|||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
|
||||
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
|
||||
|
||||
private:
|
||||
void GetInputOutputDataType(const AnfNodePtr &anf_node);
|
||||
|
|
|
@ -21,8 +21,10 @@
|
|||
#include "framework/ge_runtime/task_info.h"
|
||||
#include "runtime/device/ascend/profiling/profiling_utils.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "runtime/device/ascend/executor/rts/profiling_rts_dynamic_kernel.h"
|
||||
|
||||
using ProfilerTraceTaskInfo = ge::model_runner::ProfilerTraceTaskInfo;
|
||||
using mindspore::device::ascend::ProfilingRtsDynamicKernel;
|
||||
using mindspore::device::ascend::ProfilingUtils;
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -64,5 +66,9 @@ std::vector<TaskInfoPtr> ProfilingKernelMod::GenTask(const std::vector<AddressPt
|
|||
std::make_shared<ProfilerTraceTaskInfo>(kernel_name_, stream_id, log_id_, notify_, flags_);
|
||||
return {task_info_ptr};
|
||||
}
|
||||
|
||||
device::DynamicKernelPtr ProfilingKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
|
||||
return std::make_shared<ProfilingRtsDynamicKernel>(stream_ptr, cnode_ptr, log_id_, notify_, flags_);
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -27,6 +27,7 @@ class ProfilingKernelMod : public RtKernel {
|
|||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
|
||||
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
|
||||
bool Init(const AnfNodePtr &anf_node) override;
|
||||
|
||||
private:
|
||||
|
|
|
@ -29,157 +29,6 @@
|
|||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace tbe {
|
||||
static std::map<string, string> tbe_func_adapter_map = {
|
||||
{"softmax", "softmax_v2"},
|
||||
{"log_softmax", "log_softmax_v2"},
|
||||
{"apply_momentum", "apply_momentum_d"},
|
||||
{"apply_ftrl", "apply_ftrl_d"},
|
||||
{"re_lu6", "relu6"},
|
||||
{"re_lu6_grad", "relu6_grad"},
|
||||
{"re_lu", "relu"},
|
||||
{"reverse_v2", "reverse_v2_d"},
|
||||
{"re_luv2", "relu_v2"},
|
||||
{"p_re_lu", "prelu"},
|
||||
{"p_re_lu_grad", "prelu_grad"},
|
||||
{"tensor_add", "add"},
|
||||
{"reduce_mean", "reduce_mean_d"},
|
||||
{"reduce_max", "reduce_max_d"},
|
||||
{"reduce_min", "reduce_min_d"},
|
||||
{"avg_pool_grad", "avg_pool_grad_d"},
|
||||
{"avg_pool_grad_vm", "avg_pool_grad_d"},
|
||||
{"conv2d_backprop_filter", "conv2d_backprop_filter_d"},
|
||||
{"conv2d_backprop_input", "conv2d_backprop_input_d"},
|
||||
{"depthwise_conv2d_native", "depthwise_conv2d"},
|
||||
{"depthwise_conv2d_native_backprop_filter", "depthwise_conv2d_backprop_filter_d"},
|
||||
{"depthwise_conv2d_native_backprop_input", "depthwise_conv2d_backprop_input_d"},
|
||||
{"scatter_nd", "scatter_nd_d"},
|
||||
{"tile", "tile_d"},
|
||||
{"gather_v2", "gather_v2_d"},
|
||||
{"sparse_gather_v2", "gather_v2_d"},
|
||||
{"batch_mat_mul", "batch_matmul"},
|
||||
{"b_n_training_reduce", "bn_training_reduce"},
|
||||
{"b_n_training_update", "bn_training_update"},
|
||||
{"b_n_training_update_v2", "bn_training_update_v2"},
|
||||
{"b_n_training_update_v3", "bn_training_update_v3"},
|
||||
{"b_n_training_reduce_grad", "bn_training_reduce_grad"},
|
||||
{"b_n_training_update_grad", "bn_training_update_grad"},
|
||||
{"b_n_infer", "bn_infer"},
|
||||
{"b_n_infer_grad", "bn_infer_grad"},
|
||||
{"b_n_inference", "bninference_d"},
|
||||
{"n_pu_clear_float_status", "n_p_u_clear_float_status"},
|
||||
{"n_pu_get_float_status", "n_p_u_get_float_status"},
|
||||
{"n_pu_alloc_float_status", "n_p_u_alloc_float_status"},
|
||||
{"dropout_do_mask", "drop_out_do_mask"},
|
||||
{"strided_slice", "strided_slice_d"},
|
||||
{"strided_slice_grad", "strided_slice_grad_d"},
|
||||
{"sparse_apply_ftrl", "sparse_apply_ftrl_d"},
|
||||
{"sparse_apply_ftrl_v2", "sparse_apply_ftrl_v2_d"},
|
||||
{"apply_ada_max", "apply_ada_max_d"},
|
||||
{"apply_adadelta", "apply_adadelta_d"},
|
||||
{"apply_adagrad", "apply_adagrad_d"},
|
||||
{"apply_adagrad_v2", "apply_adagradv2_d"},
|
||||
{"sparse_apply_adagrad", "sparse_apply_adagrad_d"},
|
||||
{"sparse_apply_adagrad_v2", "sparse_apply_adagrad_v2_d"},
|
||||
{"apply_proximal_adagrad", "apply_proximal_adagrad_d"},
|
||||
{"sparse_apply_proximal_adagrad", "sparse_apply_proximal_adagrad_d"},
|
||||
{"apply_add_sign", "apply_add_sign_d"},
|
||||
{"apply_power_sign", "apply_power_sign_d"},
|
||||
{"apply_centered_rms_prop", "apply_centered_rms_prop_d"},
|
||||
{"transpose", "transpose_d"},
|
||||
{"fill", "fill_d"},
|
||||
{"unsorted_segment_sum", "unsorted_segment_sum_d"},
|
||||
{"unsorted_segment_prod", "unsorted_segment_prod_d"},
|
||||
{"concat", "concat_d"},
|
||||
{"slice", "slice_d"},
|
||||
{"reduce_sum", "reduce_sum_d"},
|
||||
{"inplace_add", "inplace_add_d"},
|
||||
{"inplace_sub", "inplace_sub_d"},
|
||||
{"one_hot", "one_hot_d"},
|
||||
{"sum", "reduce_sum_d"},
|
||||
{"lamb_next_mv_with_decay", "lamb_next_m_v_with_decay"},
|
||||
{"lamb_next_mv", "lamb_next_m_v"},
|
||||
{"split", "split_d"},
|
||||
{"split_v", "split_v_d"},
|
||||
{"resize_nearest_neighbor", "resize_nearest_neighbor_v2_d"},
|
||||
{"resize_nearest_neighbor_grad", "resize_nearest_neighbor_v2_grad_d"},
|
||||
{"pad", "pad_d"},
|
||||
{"argmax", "arg_max_d"},
|
||||
{"argmin", "arg_min_d"},
|
||||
{"space_to_batch", "space_to_batch_d"},
|
||||
{"batch_to_space", "batch_to_space_d"},
|
||||
{"space_to_batch_nd", "space_to_batch_nd_d"},
|
||||
{"batch_to_space_nd", "batch_to_space_nd_d"},
|
||||
{"resize_bilinear", "resize_bilinear_v2_d"},
|
||||
{"resize_bilinear_grad", "resize_bilinear_v2_grad"},
|
||||
{"adam", "apply_adam_d"},
|
||||
{"r_oi_align", "roi_align"},
|
||||
{"r_oi_align_grad", "roi_align_grad"},
|
||||
{"i_ou", "iou"},
|
||||
{"s_gd", "sgd"},
|
||||
{"l_rn", "lrn"},
|
||||
{"l_rn_grad", "lrn_grad"},
|
||||
{"l_ars_update", "lars_v2_update"},
|
||||
{"n_ms_with_mask", "nms_with_mask"},
|
||||
{"square_sum_all", "square_sum_all"},
|
||||
{"cum_sum", "cumsum_d"},
|
||||
{"range", "range_d"},
|
||||
{"lin_space", "lin_space_d"},
|
||||
{"inv_grad", "inv_grad"},
|
||||
{"apply_rms_prop", "apply_rms_prop_d"},
|
||||
{"cum_prod", "cumprod_d"},
|
||||
{"reduce_all", "reduce_all_d"},
|
||||
{"reduce_any", "reduce_any_d"},
|
||||
{"sparse_apply_adagrad", "sparse_apply_adagrad_d"},
|
||||
{"unsorted_segment_min", "unsorted_segment_min_d"},
|
||||
{"reduce_prod", "reduce_prod_d"},
|
||||
{"a_cos", "acos"},
|
||||
{"a_cos_grad", "acos_grad"},
|
||||
{"histogram_fixed_width", "histogram_fixed_width_d"},
|
||||
{"broadcast_to", "broadcast_to_d"},
|
||||
{"inplace_update", "inplace_update_d"},
|
||||
{"i_fmr", "ifmr"},
|
||||
{"matrix_diag", "matrix_diag_d"},
|
||||
{"matrix_diag_part", "matrix_diag_part_d"},
|
||||
{"matrix_set_diag", "matrix_set_diag_d"},
|
||||
{"l_stm_input_grad", "lstm_input_grad"}};
|
||||
|
||||
void TbeAdapter::NormalizeFuncName(std::string *func_name) {
|
||||
if (func_name == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "func_name is null";
|
||||
}
|
||||
std::string name_tmp;
|
||||
bool sub_head = false;
|
||||
for (string::iterator iter = func_name->begin(); iter != func_name->end(); ++iter) {
|
||||
if (islower(*iter)) {
|
||||
sub_head = false;
|
||||
}
|
||||
if (isdigit(*iter)) {
|
||||
sub_head = true;
|
||||
}
|
||||
if (isupper(*iter) && iter != func_name->begin()) {
|
||||
if (!sub_head) {
|
||||
(void)name_tmp.insert(name_tmp.end(), '_');
|
||||
sub_head = true;
|
||||
} else {
|
||||
string::iterator iter_next = iter + 1;
|
||||
if (iter_next != func_name->end()) {
|
||||
if (islower(*iter_next)) {
|
||||
(void)name_tmp.insert(name_tmp.end(), '_');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
(void)name_tmp.insert(name_tmp.end(), *iter);
|
||||
}
|
||||
(void)transform(name_tmp.begin(), name_tmp.end(), name_tmp.begin(), ::tolower);
|
||||
*func_name = name_tmp;
|
||||
auto iter = tbe_func_adapter_map.find(*func_name);
|
||||
if (iter != tbe_func_adapter_map.end()) {
|
||||
MS_LOG(INFO) << "Map actual op from me: " << *func_name << " to tbe op: " << iter->second;
|
||||
*func_name = iter->second;
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_set<std::string> input_order_adjusted_ops = {
|
||||
"Conv2DBackpropInput", "Conv2DBackpropFilter", "LogSoftmaxGrad", "LayerNormGrad", "LayerNormXBackprop",
|
||||
"LayerNormBetaGammaBackprop", "MinimumGrad", "MaximumGrad", "ApplyCenteredRMSProp"};
|
||||
|
|
|
@ -35,7 +35,6 @@ class TbeAdapter {
|
|||
public:
|
||||
TbeAdapter() = default;
|
||||
~TbeAdapter() = default;
|
||||
static void NormalizeFuncName(std::string *func_name);
|
||||
|
||||
static void InputOrderPass(const std::string &op_name, std::vector<std::vector<nlohmann::json>> const &inputs_list,
|
||||
nlohmann::json *inputs_json);
|
||||
|
|
|
@ -0,0 +1,139 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace tbe {
|
||||
|
||||
bool TbeDynamicShapeUtil::IsDynamicShapeNode(const CNodePtr &cnode) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto input_num = AnfAlgo ::GetInputTensorNum(cnode);
|
||||
for (size_t i = 0; i < input_num; ++i) {
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(cnode, i);
|
||||
if (std::any_of(input_shape.begin(), input_shape.end(), [](const size_t &dim) { return dim < 0; })) {
|
||||
MS_LOG(INFO) << "Node(" << cnode->fullname_with_scope() << ") is dynamic shape node.";
|
||||
return true;
|
||||
}
|
||||
}
|
||||
auto output_num = AnfAlgo ::GetOutputTensorNum(cnode);
|
||||
for (size_t i = 0; i < output_num; ++i) {
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(cnode, i);
|
||||
if (std::any_of(output_shape.begin(), output_shape.end(), [](const size_t &dim) { return dim < 0; })) {
|
||||
MS_LOG(INFO) << "Node(" << cnode->fullname_with_scope() << ") is dynamic shape node.";
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool TbeDynamicShapeUtil::IsDynamicShapeNode(const AnfNodePtr &anf_node) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
if (anf_node->isa<CNode>()) {
|
||||
auto cnode = anf_node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
return IsDynamicShapeNode(cnode);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void TbeDynamicShapeUtil::SetDynamicShapeAttr(const CNodePtr &cnode) {
|
||||
auto is_dyanmic_shape = IsDynamicShapeNode(cnode);
|
||||
AnfAlgo::SetNodeAttr(kAttrIsDynamicShape, MakeValue(is_dyanmic_shape), cnode);
|
||||
}
|
||||
|
||||
bool TbeDynamicShapeUtil::GetDynamicShapeAttr(const AnfNodePtr &anf_node) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
if (anf_node->isa<CNode>()) {
|
||||
auto cnode = anf_node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
return GetDynamicShapeAttr(cnode);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool TbeDynamicShapeUtil::GetDynamicShapeAttr(const CNodePtr &cnode) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto is_dynamic_shape = AnfAlgo::HasNodeAttr(kAttrIsDynamicShape, cnode);
|
||||
if (!is_dynamic_shape) {
|
||||
MS_LOG(INFO) << "Node(" << cnode->fullname_with_scope() << ") does not has is_dynamic_shape attribute.";
|
||||
return false;
|
||||
}
|
||||
is_dynamic_shape = AnfAlgo::GetNodeAttr<bool>(cnode, kAttrIsDynamicShape);
|
||||
return is_dynamic_shape;
|
||||
}
|
||||
|
||||
std::shared_ptr<OpInfo> TbeDynamicShapeUtil::FindOp(const std::string &op_name, const AnfNodePtr &anf_node) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
if (anf_node->isa<CNode>()) {
|
||||
auto cnode = anf_node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
return FindOp(op_name, cnode);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<OpInfo> TbeDynamicShapeUtil::FindOp(const std::string &op_name, const CNodePtr &cnode) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto is_dynamic_shape = GetDynamicShapeAttr(cnode);
|
||||
return mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kTBE, is_dynamic_shape);
|
||||
}
|
||||
|
||||
std::vector<std::pair<int, int>> TbeDynamicShapeUtil::GetInputDynamicRange(const AnfNodePtr &anf_node, size_t index) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
auto input_range_min = AnfAlgo::GetInputMinShape(anf_node, index);
|
||||
auto input_range_max = AnfAlgo::GetInputMaxShape(anf_node, index);
|
||||
if (input_range_min.size() != input_range_max.size()) {
|
||||
MS_EXCEPTION(ArgumentError) << "Input range size is not equal, min size: " << input_range_min.size()
|
||||
<< "max size: " << input_range_max.size();
|
||||
}
|
||||
if (input_range_min.empty() && input_range_max.empty()) {
|
||||
return {{1, 1}};
|
||||
}
|
||||
std::vector<std::pair<int, int>> ret;
|
||||
for (size_t i = 0; i < input_range_min.size(); ++i) {
|
||||
ret.emplace_back(input_range_min[i], input_range_max[i]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<std::pair<int, int>> TbeDynamicShapeUtil::GetOutputDynamicRange(const AnfNodePtr &anf_node, size_t index) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
auto output_range_min = AnfAlgo::GetOutputMinShape(anf_node, index);
|
||||
auto output_range_max = AnfAlgo::GetOutputMaxShape(anf_node, index);
|
||||
if (output_range_min.size() != output_range_max.size()) {
|
||||
MS_EXCEPTION(ArgumentError) << "Onput range size is not equal, min size: " << output_range_min.size()
|
||||
<< "max size: " << output_range_max.size();
|
||||
}
|
||||
if (output_range_max.empty() && output_range_min.empty()) {
|
||||
return {{1, 1}};
|
||||
}
|
||||
std::vector<std::pair<int, int>> ret;
|
||||
for (size_t i = 0; i < output_range_min.size(); ++i) {
|
||||
ret.emplace_back(output_range_min[i], output_range_max[i]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace tbe
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,49 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TBE_TBE_DYNAMINC_SHAPE_UTIL_H
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TBE_TBE_DYNAMINC_SHAPE_UTIL_H
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "mindspore/core/ir/anf.h"
|
||||
#include "backend/kernel_compiler/oplib/oplib.h"
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace tbe {
|
||||
|
||||
class TbeDynamicShapeUtil {
|
||||
public:
|
||||
TbeDynamicShapeUtil() = default;
|
||||
~TbeDynamicShapeUtil() = default;
|
||||
static bool IsDynamicShapeNode(const CNodePtr &cnode);
|
||||
static bool IsDynamicShapeNode(const AnfNodePtr &anf_node);
|
||||
static void SetDynamicShapeAttr(const CNodePtr &cnode);
|
||||
static bool GetDynamicShapeAttr(const CNodePtr &cnode);
|
||||
static bool GetDynamicShapeAttr(const AnfNodePtr &anf_node);
|
||||
static std::shared_ptr<OpInfo> FindOp(const std::string &op_name, const AnfNodePtr &anf_node);
|
||||
static std::shared_ptr<OpInfo> FindOp(const std::string &op_name, const CNodePtr &cnode);
|
||||
static std::vector<std::pair<int, int>> GetInputDynamicRange(const AnfNodePtr &anf_node, size_t index);
|
||||
static std::vector<std::pair<int, int>> GetOutputDynamicRange(const AnfNodePtr &anf_node, size_t index);
|
||||
};
|
||||
|
||||
} // namespace tbe
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TBE_TBE_DYNAMINC_SHAPE_UTIL_H
|
|
@ -23,6 +23,7 @@
|
|||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_adapter.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_utils.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "runtime/dev.h"
|
||||
|
@ -61,6 +62,7 @@ constexpr auto kJDataType = "data_type";
|
|||
constexpr auto kJOutputIndex = "output_index";
|
||||
constexpr auto kJOutputDesc = "output_desc";
|
||||
constexpr auto kJInputDesc = "input_desc";
|
||||
constexpr auto kJRange = "range";
|
||||
constexpr auto kVTypeInt = "int";
|
||||
constexpr auto kVTypeStr = "str";
|
||||
constexpr auto kVTypeBool = "bool";
|
||||
|
@ -89,24 +91,21 @@ constexpr auto kJKwdArgs = "kwds_args";
|
|||
constexpr auto kJListArgs = "list_args";
|
||||
constexpr auto kJSocVersion = "socVersion";
|
||||
constexpr auto kSOC_VERSION = "SOC_VERSION";
|
||||
constexpr auto kJIsDynamicShape = "is_dynamic_shape";
|
||||
|
||||
bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspore::AnfNode> &anf_node,
|
||||
nlohmann::json *kernel_json) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
MS_EXCEPTION_IF_NULL(kernel_json);
|
||||
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
|
||||
auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kTBE);
|
||||
auto op_info_ptr = mindspore::kernel::tbe::TbeDynamicShapeUtil::FindOp(op_name, anf_node);
|
||||
MS_EXCEPTION_IF_NULL(op_info_ptr);
|
||||
(*kernel_json)[kPlatform] = kPlatTBE;
|
||||
(*kernel_json)[kGenModel] = kSingle;
|
||||
(*kernel_json)[kImplPath] = op_info_ptr->impl_path();
|
||||
nlohmann::json op_info_json;
|
||||
if (op_info_ptr->impl_path().empty()) {
|
||||
tbe::TbeAdapter::NormalizeFuncName(&op_name);
|
||||
} else {
|
||||
op_name = op_info_ptr->kernel_name();
|
||||
}
|
||||
op_info_json[kJName] = op_name;
|
||||
op_info_json[kJIsDynamicShape] = tbe::TbeDynamicShapeUtil::GetDynamicShapeAttr(anf_node->cast<CNodePtr>());
|
||||
op_info_json[kJName] = op_info_ptr->kernel_name();
|
||||
// generate inputs json
|
||||
nlohmann::json inputs_json;
|
||||
if (!GenTbeInputsJson(anf_node, op_info_ptr, &inputs_json)) {
|
||||
|
@ -180,6 +179,7 @@ bool TbeKernelJsonCreator::GenInputDescJson(const std::shared_ptr<AnfNode> &anf_
|
|||
input_desc_json[kJFormat] = format;
|
||||
input_desc_json[kJValid] = value;
|
||||
input_desc_json[kJParamType] = input_ptr->param_type();
|
||||
input_desc_json[kJRange] = tbe::TbeDynamicShapeUtil::GetInputDynamicRange(anf_node, real_input_index);
|
||||
input_list->emplace_back(input_desc_json);
|
||||
}
|
||||
return true;
|
||||
|
@ -359,8 +359,13 @@ void TbeKernelJsonCreator::GenOutputList(const std::shared_ptr<AnfNode> &anf_nod
|
|||
for (size_t i = 0; i < output_obj_num; i++) {
|
||||
auto dtype = GetDeviceOutputType(anf_node, *output_idx);
|
||||
auto format = GetDeviceOutputFormat(anf_node, *output_idx);
|
||||
auto shape = GetDeviceOutputShape(anf_node, *output_idx);
|
||||
std::vector<size_t> ori_shape = AnfAlgo::GetOutputInferShape(anf_node, *output_idx);
|
||||
|
||||
std::vector<int64_t> shape;
|
||||
AnfAlgo::GetRealDynamicShape(GetDeviceOutputShape(anf_node, *output_idx), NOT_NULL(&shape));
|
||||
|
||||
std::vector<int64_t> ori_shape;
|
||||
AnfAlgo::GetRealDynamicShape(AnfAlgo::GetOutputInferShape(anf_node, *output_idx), NOT_NULL(&ori_shape));
|
||||
// std::vector<size_t> ori_shape = AnfAlgo::GetOutputInferShape(anf_node, *output_idx);
|
||||
if (ori_shape.empty()) {
|
||||
ori_shape.emplace_back(1);
|
||||
}
|
||||
|
@ -373,6 +378,7 @@ void TbeKernelJsonCreator::GenOutputList(const std::shared_ptr<AnfNode> &anf_nod
|
|||
output_obj[kJName] = output_ptr->name();
|
||||
output_obj[kJValid] = true;
|
||||
output_obj[kJParamType] = output_ptr->param_type();
|
||||
output_obj[kJRange] = tbe::TbeDynamicShapeUtil::GetOutputDynamicRange(anf_node, *output_idx);
|
||||
output_list->emplace_back(output_obj);
|
||||
(*output_idx)++;
|
||||
}
|
||||
|
@ -575,48 +581,76 @@ std::string TbeKernelJsonCreator::GetDeviceOutputFormat(const AnfNodePtr &anf_no
|
|||
return format;
|
||||
}
|
||||
|
||||
void GetInputSizeList(const nlohmann::json &input_json, std::vector<size_t> *input_size_list,
|
||||
const AnfNodePtr &anf_node) {
|
||||
for (size_t i = 0; i < input_json.size(); i++) {
|
||||
for (size_t m = 0; m < input_json[i].size(); m++) {
|
||||
size_t size_i = 1;
|
||||
if (input_json[i][m][kJValid] == false) {
|
||||
std::string input_name = input_json[i][m][kJName];
|
||||
MS_LOG(INFO) << "Input name:" << input_name << "is optional, valid is false.";
|
||||
continue;
|
||||
}
|
||||
for (size_t j = 0; j < input_json[i][m][kJShape].size(); ++j) {
|
||||
if (input_json[i][m][kJShape][j] == -1) {
|
||||
auto input_max_shape = AnfAlgo::GetInputMaxShape(anf_node, i);
|
||||
if (j >= input_max_shape.size()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid Dynamic Shape Max Shape";
|
||||
}
|
||||
MS_LOG(INFO) << "Change -1 Shape to Max Shape:" << input_max_shape[j];
|
||||
size_i *= input_max_shape[j];
|
||||
continue;
|
||||
}
|
||||
size_i *= static_cast<size_t>(input_json[i][m][kJShape][j]);
|
||||
}
|
||||
std::string dtype = input_json[i][m][kJDtype];
|
||||
size_t nbyte = tbe::GetDtypeNbyte(dtype);
|
||||
size_i *= nbyte;
|
||||
input_size_list->push_back(size_i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GetOutputSizeList(const nlohmann::json &output_json, std::vector<size_t> *output_size_list,
|
||||
const AnfNodePtr &anf_node) {
|
||||
for (size_t i = 0; i < output_json.size(); i++) {
|
||||
for (size_t m = 0; m < output_json[i].size(); m++) {
|
||||
size_t size_i = 1;
|
||||
if (output_json[i][m][kJValid] == false) {
|
||||
std::string output_name = output_json[i][m][kJName];
|
||||
MS_LOG(INFO) << "Output name:" << output_name << " is optional, valid is false.";
|
||||
continue;
|
||||
}
|
||||
for (size_t j = 0; j < output_json[i][m][kJShape].size(); ++j) {
|
||||
if (output_json[i][m][kJShape][j] == -1) {
|
||||
auto output_max_shape = AnfAlgo::GetOutputMaxShape(anf_node, i);
|
||||
if (j >= output_max_shape.size()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid Dynamic Shape Max Shape";
|
||||
}
|
||||
MS_LOG(INFO) << "Change -1 Shape to Max Shape:" << output_max_shape[j];
|
||||
size_i *= output_max_shape[j];
|
||||
continue;
|
||||
}
|
||||
size_i *= static_cast<size_t>(output_json[i][m][kJShape][j]);
|
||||
}
|
||||
std::string dtype = output_json[i][m][kJDtype];
|
||||
size_t nbyte = tbe::GetDtypeNbyte(dtype);
|
||||
size_i *= nbyte;
|
||||
output_size_list->push_back(size_i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector<size_t> *input_size_list,
|
||||
std::vector<size_t> *output_size_list) {
|
||||
std::vector<size_t> *output_size_list, const AnfNodePtr &anf_node) {
|
||||
if (input_size_list == nullptr || output_size_list == nullptr) {
|
||||
MS_LOG(ERROR) << "Input size or output size is nullptr";
|
||||
return false;
|
||||
}
|
||||
input_size_list->clear();
|
||||
output_size_list->clear();
|
||||
for (size_t i = 0; i < kernel_json[kJOpInfo][kJInputs].size(); i++) {
|
||||
for (size_t m = 0; m < kernel_json[kJOpInfo][kJInputs][i].size(); m++) {
|
||||
size_t size_i = 1;
|
||||
if (kernel_json[kJOpInfo][kJInputs][i][m][kJValid] == false) {
|
||||
std::string input_name = kernel_json[kJOpInfo][kJInputs][i][m][kJName];
|
||||
MS_LOG(INFO) << "Input name:" << input_name << "is optional, valid is false.";
|
||||
continue;
|
||||
}
|
||||
for (const auto &j : kernel_json[kJOpInfo][kJInputs][i][m][kJShape]) {
|
||||
size_i *= static_cast<size_t>(j);
|
||||
}
|
||||
std::string dtype = kernel_json[kJOpInfo][kJInputs][i][m][kJDtype];
|
||||
size_t nbyte = tbe::GetDtypeNbyte(dtype);
|
||||
size_i *= nbyte;
|
||||
input_size_list->push_back(size_i);
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < kernel_json[kJOpInfo][kJOutputs].size(); i++) {
|
||||
for (size_t m = 0; m < kernel_json[kJOpInfo][kJOutputs][i].size(); m++) {
|
||||
size_t size_i = 1;
|
||||
if (kernel_json[kJOpInfo][kJOutputs][i][m][kJValid] == false) {
|
||||
std::string output_name = kernel_json[kJOpInfo][kJOutputs][i][m][kJName];
|
||||
MS_LOG(INFO) << "Output name:" << output_name << " is optional, valid is false.";
|
||||
continue;
|
||||
}
|
||||
for (const auto &j : kernel_json[kJOpInfo][kJOutputs][i][m][kJShape]) {
|
||||
size_i *= static_cast<size_t>(j);
|
||||
}
|
||||
std::string dtype = kernel_json[kJOpInfo][kJOutputs][i][m][kJDtype];
|
||||
size_t nbyte = tbe::GetDtypeNbyte(dtype);
|
||||
size_i *= nbyte;
|
||||
output_size_list->push_back(size_i);
|
||||
}
|
||||
}
|
||||
GetInputSizeList(kernel_json[kJOpInfo][kJInputs], input_size_list, anf_node);
|
||||
GetOutputSizeList(kernel_json[kJOpInfo][kJOutputs], output_size_list, anf_node);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -678,17 +712,18 @@ void TbeKernelBuild::GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode
|
|||
MS_EXCEPTION_IF_NULL(fusion_kernel_name);
|
||||
// gen others
|
||||
auto origin_type = AnfAlgo::GetCNodeName(cnode);
|
||||
auto op_info_ptr = tbe::TbeDynamicShapeUtil::FindOp(origin_type, cnode);
|
||||
// replace special op type for buffer fusion op
|
||||
auto type = GetRealOpType(origin_type);
|
||||
(*compute_op_str)[kJtype] = type;
|
||||
tbe::TbeAdapter::NormalizeFuncName(&type);
|
||||
(*compute_op_str)[kJFuncName] = type;
|
||||
auto kernel_name = op_info_ptr->kernel_name();
|
||||
(*compute_op_str)[kJFuncName] = kernel_name;
|
||||
(*compute_op_str)[kJModuleName] = std::string("impl.") + type;
|
||||
(*compute_op_str)[kJName] = cnode->fullname_with_scope();
|
||||
(*compute_op_str)[kJPattern] = GetNodeFusionType(cnode);
|
||||
(*compute_op_str)[kJPyModulePath] = "/usr/local/Ascend/opp/op_impl/build_in/ai_core/tbe";
|
||||
(void)(*fusion_kernel_name).append("_");
|
||||
(void)(*fusion_kernel_name).append(type);
|
||||
(void)(*fusion_kernel_name).append(kernel_name);
|
||||
}
|
||||
|
||||
void TbeKernelBuild::GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str) {
|
||||
|
@ -952,7 +987,7 @@ size_t TbeKernelBuild::GetOptionalInput(const mindspore::CNodePtr &cnode, bool i
|
|||
}
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto node_name = AnfAlgo::GetCNodeName(cnode);
|
||||
auto op_info = OpLib::FindOp(node_name, kTBE);
|
||||
auto op_info = tbe::TbeDynamicShapeUtil::FindOp(node_name, cnode);
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (op_info->inputs_ptr().size() < (cnode->inputs().size() - 1)) {
|
||||
MS_EXCEPTION(ArgumentError) << "op info error, node name:" << cnode->fullname_with_scope();
|
||||
|
|
|
@ -38,7 +38,7 @@ class TbeKernelBuild {
|
|||
|
||||
public:
|
||||
static bool GetIOSize(const nlohmann::json &kernel_json, std::vector<size_t> *input_size_list,
|
||||
std::vector<size_t> *output_size_list);
|
||||
std::vector<size_t> *output_size_list, const AnfNodePtr &anf_node);
|
||||
// Ub Fuison
|
||||
static bool GenFusionScopeJson(const std::vector<AnfNodePtr> &input_nodes,
|
||||
const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_json,
|
||||
|
|
|
@ -19,11 +19,14 @@
|
|||
#include "runtime/rt.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "graphengine/inc/framework/ge_runtime/task_info.h"
|
||||
#include "runtime/device/ascend/executor/ai_core_dynamic_kernel.h"
|
||||
#include "runtime/device/kernel_runtime.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
using TbeTaskInfoPtr = std::shared_ptr<ge::model_runner::TbeTaskInfo>;
|
||||
using tbe::KernelManager;
|
||||
using AddressPtrList = std::vector<mindspore::kernel::AddressPtr>;
|
||||
bool TbeKernelMod::Launch(const std::vector<mindspore::kernel::AddressPtr> &inputs,
|
||||
const std::vector<mindspore::kernel::AddressPtr> &workspace,
|
||||
const std::vector<mindspore::kernel::AddressPtr> &outputs, void *stream_ptr) {
|
||||
|
@ -105,6 +108,49 @@ std::vector<TaskInfoPtr> TbeKernelMod::GenTask(const std::vector<AddressPtr> &in
|
|||
return {task_info_ptr};
|
||||
}
|
||||
|
||||
device::DynamicKernelPtr TbeKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
|
||||
AddressPtrList kernel_inputs;
|
||||
AddressPtrList kernel_workspaces;
|
||||
AddressPtrList kernel_outputs;
|
||||
device::KernelRuntime::GenLaunchArgs(*this, cnode_ptr, &kernel_inputs, &kernel_workspaces, &kernel_outputs);
|
||||
|
||||
// Get para_size from json
|
||||
auto kernel_json_info = kernel_pack_->kernel_json_info();
|
||||
auto op_para_size = kernel_json_info.op_para_size;
|
||||
|
||||
// Get stub_function
|
||||
uint32_t block_dim = 1; // default blockdim equal to 1.
|
||||
auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim);
|
||||
if (func_stub == 0) {
|
||||
MS_LOG(EXCEPTION) << "GenFuncStub failed.";
|
||||
}
|
||||
const void *stub_func_ptr = reinterpret_cast<void *>(func_stub);
|
||||
|
||||
// Generate args
|
||||
std::vector<void *> runtime_args;
|
||||
(void)std::transform(std::begin(kernel_inputs), std::end(kernel_inputs), std::back_inserter(runtime_args),
|
||||
[](const AddressPtr &input) -> void * { return input->addr; });
|
||||
(void)std::transform(std::begin(kernel_outputs), std::end(kernel_outputs), std::back_inserter(runtime_args),
|
||||
[](const AddressPtr &output) -> void * { return output->addr; });
|
||||
if (!kernel_workspaces.empty()) {
|
||||
(void)std::transform(std::begin(kernel_workspaces), std::end(kernel_workspaces), std::back_inserter(runtime_args),
|
||||
[](const AddressPtr &addr) -> void * { return addr->addr; });
|
||||
}
|
||||
|
||||
void *tiling_data_ptr = nullptr;
|
||||
if (op_para_size > 0) {
|
||||
auto ret = rtMalloc(&tiling_data_ptr, op_para_size, RT_MEMORY_HBM);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "rtMalloc tiling data failed";
|
||||
}
|
||||
runtime_args.push_back(tiling_data_ptr);
|
||||
}
|
||||
|
||||
auto executor = std::make_shared<device::ascend::AiCoreDynamicKernel>(
|
||||
stub_func_ptr, block_dim, tiling_data_ptr, op_para_size, stream_ptr, cnode_ptr, runtime_args);
|
||||
return executor;
|
||||
}
|
||||
|
||||
vector<size_t> TbeKernelMod::GenParameters() {
|
||||
auto kernel_json_info = kernel_pack_->kernel_json_info();
|
||||
return kernel_json_info.parameters;
|
||||
|
|
|
@ -42,6 +42,7 @@ class TbeKernelMod : public AscendKernelMod {
|
|||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspaces,
|
||||
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
|
||||
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
|
||||
std::vector<size_t> GenParameters() override;
|
||||
|
||||
private:
|
||||
|
|
|
@ -15,13 +15,11 @@
|
|||
*/
|
||||
|
||||
#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
|
||||
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "utils/ms_context.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_adapter.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
|
||||
|
@ -29,6 +27,7 @@
|
|||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_utils.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
@ -52,15 +51,18 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
|
|||
// get size
|
||||
std::vector<size_t> input_size_list;
|
||||
std::vector<size_t> output_size_list;
|
||||
(void)TbeKernelBuild::GetIOSize(kernel_json, &input_size_list, &output_size_list);
|
||||
(void)TbeKernelBuild::GetIOSize(kernel_json, &input_size_list, &output_size_list, anf_node);
|
||||
// search cache
|
||||
const std::string &json_name = creator.json_name();
|
||||
if (build_manger->SearchInCache(json_name, processor, input_size_list, output_size_list, anf_node.get())) {
|
||||
MS_LOG(INFO) << "Use cached kernel, kernel json name:." << json_name;
|
||||
auto IsDynamicShape = tbe::TbeDynamicShapeUtil::GetDynamicShapeAttr(anf_node);
|
||||
if (build_manger->SearchInCache(json_name, processor, input_size_list, output_size_list, anf_node.get()) &&
|
||||
!IsDynamicShape) {
|
||||
MS_LOG(INFO) << "Node:" << anf_node->fullname_with_scope() << " Use cached kernel, kernel json name:."
|
||||
<< json_name;
|
||||
continue;
|
||||
}
|
||||
// same op not need build, but need wait build finish to set kernel mode
|
||||
if (processed_kernel.find(json_name) != processed_kernel.end()) {
|
||||
if (processed_kernel.find(json_name) != processed_kernel.end() && !IsDynamicShape) {
|
||||
build_manger->SaveSameOpInfo(anf_node, json_name, input_size_list, output_size_list);
|
||||
continue;
|
||||
}
|
||||
|
@ -72,8 +74,8 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
|
|||
while (!build_manger->IsAllTaskFinish()) {
|
||||
int task_id = -1;
|
||||
std::string task_result;
|
||||
std::string pre_build_result;
|
||||
auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result);
|
||||
std::string build_result;
|
||||
auto ret = build_manger->WaitOne(&task_id, &task_result, &build_result);
|
||||
if (!ret) {
|
||||
MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id;
|
||||
}
|
||||
|
@ -81,7 +83,7 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
|
|||
if (task_result != "Success") {
|
||||
MS_EXCEPTION(ArgumentError) << "task compile Failed, task id:" << task_id << ", cause:" << task_result;
|
||||
}
|
||||
(void)build_manger->TaskFinishProcess(task_id);
|
||||
(void)build_manger->TaskFinishProcess(task_id, build_result);
|
||||
}
|
||||
return build_manger->GenSameOpKernelMod();
|
||||
}
|
||||
|
@ -93,7 +95,7 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod
|
|||
const std::vector<size_t> &output_size_list, int32_t scope_id) {
|
||||
MS_LOG(INFO) << "SaveTaskInfo, task id: " << task_id;
|
||||
struct KernelBuildTaskInfo task_info;
|
||||
task_info.node = anf_node.get();
|
||||
task_info.node = anf_node;
|
||||
task_info.json_name = json_name;
|
||||
if (anf_node == nullptr) {
|
||||
task_info.processor = tbe::kProcessorAiCore;
|
||||
|
@ -111,7 +113,38 @@ bool ParallelBuildManager::IsAllTaskFinish() const {
|
|||
return task_map_.empty();
|
||||
}
|
||||
|
||||
std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t task_id, bool set_kernel_mod) {
|
||||
void ParallelBuildManager::PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result) {
|
||||
auto task_iter = pre_task_map_.find(task_id);
|
||||
if (task_iter == pre_task_map_.end()) {
|
||||
MS_EXCEPTION(ArgumentError) << "can find pre task_id:" << task_id;
|
||||
}
|
||||
auto node = task_iter->second;
|
||||
auto builder =
|
||||
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node));
|
||||
std::string start_flag = "fusion_pattern_start";
|
||||
std::string end_flag = "fusion_pattern_end";
|
||||
int start = pre_build_result.find(start_flag);
|
||||
int end = pre_build_result.find(end_flag);
|
||||
if (start != -1 && end != -1 && end >= start) {
|
||||
std::string result = pre_build_result.substr(start + start_flag.size(), end - start - start_flag.size());
|
||||
if (result.empty()) {
|
||||
(void)pre_task_map_.erase(task_iter);
|
||||
return;
|
||||
}
|
||||
transform(result.begin(), result.end(), result.begin(), ::toupper);
|
||||
AnfAlgo::SetNodeAttr(kAttrFusionType, MakeValue(result), node);
|
||||
FusionType fusion_type = tbe::GetFusionType(result);
|
||||
builder->SetFusionType(fusion_type);
|
||||
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get());
|
||||
}
|
||||
(void)pre_task_map_.erase(task_iter);
|
||||
}
|
||||
|
||||
std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t task_id, const std::string &build_ret,
|
||||
bool set_kernel_mod) {
|
||||
auto compile_info = ProcessBuildRetStr(build_ret);
|
||||
MS_LOG(DEBUG) << "Tbe build ret:" << compile_info;
|
||||
|
||||
auto task_iter = task_map_.find(task_id);
|
||||
if (task_iter == task_map_.end()) {
|
||||
MS_EXCEPTION(ArgumentError) << "can find task_id:" << task_id;
|
||||
|
@ -133,7 +166,9 @@ std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t
|
|||
task_iter->second.output_size_list, kernel_pack);
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||
if (set_kernel_mod) {
|
||||
AnfAlgo::SetKernelMod(kernel_mod, task_iter->second.node);
|
||||
AnfAlgo::SetKernelMod(kernel_mod, task_iter->second.node.get());
|
||||
AnfAlgo::SetNodeAttr(kAttrCompileInfo, MakeValue(compile_info), task_iter->second.node);
|
||||
MS_LOG(DEBUG) << "Set Node Attr compile_info:" << compile_info;
|
||||
}
|
||||
auto ret = std::make_pair(task_iter->second.scope_id, kernel_mod);
|
||||
(void)task_map_.erase(task_iter);
|
||||
|
@ -145,7 +180,7 @@ void ParallelBuildManager::SaveSameOpInfo(const mindspore::AnfNodePtr &anf_node,
|
|||
const std::vector<size_t> &input_size_list,
|
||||
const std::vector<size_t> &output_size_list) {
|
||||
struct KernelBuildTaskInfo task_info;
|
||||
task_info.node = anf_node.get();
|
||||
task_info.node = anf_node;
|
||||
task_info.json_name = json_name;
|
||||
task_info.processor = tbe::GetProcessor(anf_node);
|
||||
task_info.input_size_list.assign(input_size_list.begin(), input_size_list.end());
|
||||
|
@ -156,7 +191,7 @@ void ParallelBuildManager::SaveSameOpInfo(const mindspore::AnfNodePtr &anf_node,
|
|||
bool ParallelBuildManager::GenSameOpKernelMod() const {
|
||||
for (const auto &task_info : same_op_list_) {
|
||||
bool ret = SearchInCache(task_info.json_name, task_info.processor, task_info.input_size_list,
|
||||
task_info.output_size_list, task_info.node);
|
||||
task_info.output_size_list, task_info.node.get());
|
||||
if (!ret) {
|
||||
MS_LOG(INFO) << "can't find " << task_info.json_name << " in cache.";
|
||||
return false;
|
||||
|
@ -212,5 +247,20 @@ void ParallelBuildManager::ResetTaskInfo() {
|
|||
same_op_list_.clear();
|
||||
AscendKernelBuildClient::Instance().TbeReset();
|
||||
}
|
||||
|
||||
std::string ParallelBuildManager::ProcessBuildRetStr(const std::string &build_result) {
|
||||
std::string start_flag = "fusion_pattern_start";
|
||||
std::string end_flag = "fusion_pattern_end";
|
||||
int start = build_result.find(start_flag);
|
||||
int end = build_result.find(end_flag);
|
||||
if (start != -1 && end != -1 && end >= start) {
|
||||
std::string result = build_result.substr(start + start_flag.size(), end - start - start_flag.size());
|
||||
if (!result.empty()) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -31,7 +31,7 @@ namespace kernel {
|
|||
bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes);
|
||||
|
||||
struct KernelBuildTaskInfo {
|
||||
AnfNode *node;
|
||||
AnfNodePtr node;
|
||||
std::string processor;
|
||||
std::string json_name;
|
||||
std::vector<size_t> input_size_list;
|
||||
|
@ -53,16 +53,21 @@ class ParallelBuildManager {
|
|||
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
|
||||
AnfNode *node) const;
|
||||
bool IsAllTaskFinish() const;
|
||||
std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true);
|
||||
void PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result);
|
||||
std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, const std::string &build_ret,
|
||||
bool set_kernel_mod = true);
|
||||
KernelModPtr GenKernelMod(const string &json_name, const string &processor,
|
||||
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
|
||||
const KernelPackPtr &kernel_pack) const;
|
||||
|
||||
// Interactive with real backend, who could be implemented by Python.
|
||||
int StartCompileOp(const nlohmann::json &kernel_json);
|
||||
bool WaitOne(int *task_id, std::string *task_result, std::string *pre_build_result);
|
||||
static int StartCompileOp(const nlohmann::json &kernel_json);
|
||||
static bool WaitOne(int *task_id, std::string *task_result, std::string *build_result);
|
||||
void ResetTaskInfo();
|
||||
|
||||
private:
|
||||
std::string ProcessBuildRetStr(const std::string &build_result);
|
||||
|
||||
private:
|
||||
std::map<int32_t, AnfNodePtr> pre_task_map_;
|
||||
std::map<int32_t, KernelBuildTaskInfo> task_map_;
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_reduce_selecter.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_kernel_select/common_utils.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_property_checker.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.h"
|
||||
#include "backend/session/kernel_build_client.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -54,7 +55,8 @@ void TbeKernelSelect::TbeMetadataInfoEx() {
|
|||
MS_EXCEPTION_IF_NULL(cnode_ptr_);
|
||||
MS_EXCEPTION_IF_NULL(kernel_info_list_);
|
||||
node_name_ = AnfAlgo::GetCNodeName(cnode_ptr_);
|
||||
auto op_info_ptr = OpLib::FindOp(node_name_, kTBE);
|
||||
|
||||
auto op_info_ptr = tbe::TbeDynamicShapeUtil::FindOp(node_name_, cnode_ptr_);
|
||||
if (!op_info_ptr) {
|
||||
MS_LOG(INFO) << "Warning: Cann't find tbe core opinfo, node type: " << node_name_;
|
||||
return;
|
||||
|
@ -81,6 +83,7 @@ void TbeKernelSelect::TbeMetadataInfoEx() {
|
|||
}
|
||||
// check support
|
||||
FilterInVaildKernelInfo();
|
||||
MS_LOG(INFO) << "End get kernel build info size: " << kernel_info_list_->size() << ", after tbe select.";
|
||||
}
|
||||
|
||||
void TbeKernelSelect::GetCommonPatternKernelInfo(const OpInfo &op_info) {
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "backend/kernel_compiler/kernel_query.h"
|
||||
#include "backend/kernel_compiler/oplib/oplib.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace opt {
|
||||
|
@ -62,7 +63,7 @@ class KernelQuery {
|
|||
if (!node->isa<CNode>()) {
|
||||
return false;
|
||||
}
|
||||
auto op_info = mindspore::kernel::OpLib::FindOp(AnfAlgo::GetCNodeName(node), kernel::kTBE);
|
||||
auto op_info = mindspore::kernel::tbe::TbeDynamicShapeUtil::FindOp(AnfAlgo::GetCNodeName(node), node);
|
||||
if (op_info != nullptr) {
|
||||
return op_info->is_ref();
|
||||
}
|
||||
|
@ -75,8 +76,8 @@ class OpFinder {
|
|||
public:
|
||||
OpFinder() = default;
|
||||
virtual ~OpFinder() = default;
|
||||
virtual int GetOpRegisteredOutputNum(const std::string &op_name) {
|
||||
auto op_info = kernel::OpLib::FindOp(op_name, kernel::kTBE);
|
||||
virtual int GetOpRegisteredOutputNum(const std::string &op_name, const CNodePtr &cnode) {
|
||||
auto op_info = kernel::tbe::TbeDynamicShapeUtil::FindOp(op_name, cnode);
|
||||
if (op_info == nullptr) {
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -46,6 +46,9 @@ const AnfNodePtr InsertPadForNMSWithMask::Process(const FuncGraphPtr &func_graph
|
|||
const EquivPtr &) const {
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (AnfAlgo::IsDynamicShape(node)) {
|
||||
return nullptr;
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ session::KernelWithIndex FindRefOriginNode(const AnfNodePtr &node) {
|
|||
auto cnode = cur_node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
std::string op_name = AnfAlgo::GetCNodeName(cnode);
|
||||
auto op_info = mindspore::kernel::OpLib::FindOp(op_name, kernel::kTBE);
|
||||
auto op_info = mindspore::kernel::tbe::TbeDynamicShapeUtil::FindOp(op_name, cnode);
|
||||
// deal ref op
|
||||
if (op_info != nullptr && op_info->is_ref()) {
|
||||
auto ref_infos = op_info->ref_infos();
|
||||
|
@ -223,7 +223,7 @@ const AnfNodePtr DealRefTransAndCast::Process(const FuncGraphPtr &graph, const A
|
|||
DealBroadCastAsRef(graph, cnode);
|
||||
|
||||
auto op_name = AnfAlgo::GetCNodeName(cnode);
|
||||
auto op_info = mindspore::kernel::OpLib::FindOp(op_name, kernel::kTBE);
|
||||
auto op_info = mindspore::kernel::tbe::TbeDynamicShapeUtil::FindOp(op_name, cnode);
|
||||
if (op_info == nullptr || !op_info->is_ref()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -65,6 +65,9 @@ const AnfNodePtr ConcatFission::Process(const FuncGraphPtr &func_graph, const An
|
|||
const EquivPtr &) const {
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (AnfAlgo::IsDynamicShape(node)) {
|
||||
return nullptr;
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
// The real input begins with index 1.
|
||||
|
|
|
@ -86,6 +86,9 @@ const AnfNodePtr LayerNormGradSplit::Process(const FuncGraphPtr &graph, const An
|
|||
const EquivPtr &) const {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (AnfAlgo::IsDynamicShape(node)) {
|
||||
return nullptr;
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
if (cnode->inputs().size() != kLayerNormGradInputNum) {
|
||||
return nullptr;
|
||||
|
|
|
@ -72,6 +72,9 @@ const BaseRef PackFission::DefinePattern() const {
|
|||
const AnfNodePtr PackFission::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const {
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (AnfAlgo::IsDynamicShape(node)) {
|
||||
return nullptr;
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
// The real input begins with index 1.
|
||||
|
|
|
@ -105,6 +105,9 @@ const AnfNodePtr ReduceMinFission::Process(const FuncGraphPtr &graph, const AnfN
|
|||
if (graph == nullptr || node == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
if (AnfAlgo::IsDynamicShape(node)) {
|
||||
return nullptr;
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
CheckCNodeInputSize(cnode, 2);
|
||||
|
|
|
@ -174,6 +174,9 @@ const BaseRef SplitFission::DefinePattern() const {
|
|||
|
||||
const AnfNodePtr SplitFission::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (AnfAlgo::IsDynamicShape(node)) {
|
||||
return nullptr;
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
// Check output num
|
||||
|
|
|
@ -127,6 +127,9 @@ const BaseRef TopKSplit::DefinePattern() const {
|
|||
const AnfNodePtr TopKSplit::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const {
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (AnfAlgo::IsDynamicShape(node)) {
|
||||
return nullptr;
|
||||
}
|
||||
auto kernel_graph = func_graph->cast<KernelGraphPtr>();
|
||||
// set value node as topk's input
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
|
|
|
@ -86,7 +86,7 @@ const AnfNodePtr AddInputToOutput::Process(const FuncGraphPtr &func_graph, const
|
|||
if (!InputToOutputRegistry::Instance().GetRegisterByOpName(op_name, ®)) {
|
||||
return nullptr;
|
||||
}
|
||||
int output_num = op_finder_->GetOpRegisteredOutputNum(op_name);
|
||||
int output_num = op_finder_->GetOpRegisteredOutputNum(op_name, cnode);
|
||||
// No need add output when it is not a tbe op.
|
||||
if (output_num == -1) {
|
||||
return nullptr;
|
||||
|
|
|
@ -84,6 +84,9 @@ bool QuitFusion(const FuncGraphPtr &graph, const AnfNodePtr &mul0_anf, const Anf
|
|||
MS_LOG(INFO) << "mul's second input is not addn";
|
||||
return true;
|
||||
}
|
||||
if (AnfAlgo::IsDynamicShape(addn)) {
|
||||
return true;
|
||||
}
|
||||
std::vector<size_t> shape = AnfAlgo::GetOutputInferShape(addn, 0);
|
||||
if (shape.size() != 2 || !(shape[1] == 1024 || shape[1] == 768)) {
|
||||
MS_LOG(INFO) << "Addn's infer shape is not equal [x,1024] or [x,768]";
|
||||
|
|
|
@ -53,6 +53,9 @@ CNodePtr CreateReluV2(const FuncGraphPtr &graph, const CNodePtr &relu) {
|
|||
|
||||
// ReluV2's 2rd output is mask whose data type is uint8
|
||||
TypeId mask_dtype = kNumberTypeUInt8;
|
||||
if (AnfAlgo::IsDynamicShape(relu)) {
|
||||
return nullptr;
|
||||
}
|
||||
std::vector<size_t> mask_shape = AnfAlgo::GetOutputInferShape(relu, 0);
|
||||
if (mask_shape.size() != 4) {
|
||||
MS_LOG(DEBUG) << "relu's infer shape size not equal 4";
|
||||
|
|
|
@ -29,6 +29,9 @@ bool CheckValueNodeInputOfMul(const AnfNodePtr &node) {
|
|||
if (!node->isa<ValueNode>()) {
|
||||
return false;
|
||||
}
|
||||
if (AnfAlgo::IsDynamicShape(node)) {
|
||||
return false;
|
||||
}
|
||||
std::vector<size_t> mul_input_shape = AnfAlgo::GetOutputInferShape(node, 0);
|
||||
return mul_input_shape.empty() || (mul_input_shape.size() == 1 && mul_input_shape[0] == 1);
|
||||
}
|
||||
|
|
|
@ -85,6 +85,9 @@ const AnfNodePtr MulAddNFusion::Process(const FuncGraphPtr &graph, const AnfNode
|
|||
break;
|
||||
}
|
||||
}
|
||||
if (AnfAlgo::IsDynamicShape(mul->input(lossscale_input_index))) {
|
||||
return nullptr;
|
||||
}
|
||||
auto constant_shape = AnfAlgo::GetOutputInferShape(mul->input(lossscale_input_index), 0);
|
||||
if (!(constant_shape.size() == 0 || (constant_shape.size() == 1 && constant_shape[0] == 1))) {
|
||||
MS_LOG(DEBUG) << "The const input of Mul node must be scalar or shape=(1,), but shape size is "
|
||||
|
|
|
@ -45,6 +45,10 @@ const AnfNodePtr RemoveReshapePair::Process(const FuncGraphPtr &func_graph, cons
|
|||
if (IsUsedByOthers(func_graph, in_reshape)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (AnfAlgo::IsDynamicShape(out_reshape) || AnfAlgo::IsDynamicShape(in_reshape)) {
|
||||
return nullptr;
|
||||
}
|
||||
auto output_shape = AnfAlgo::GetOutputDeviceShape(out_reshape, 0);
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(in_reshape, 0);
|
||||
if (kernel::IsSameShape(input_shape, output_shape)) {
|
||||
|
|
|
@ -50,6 +50,9 @@ const AnfNodePtr ReshapeTransposeFusion::Process(const FuncGraphPtr &func_graph,
|
|||
MS_EXCEPTION_IF_NULL(transpose_cnode);
|
||||
auto reshape_cnode = CheckAnfNodeIfCNodeAndInputSize(transpose_cnode->input(1), kBackendReshapeInputNum);
|
||||
MS_EXCEPTION_IF_NULL(reshape_cnode);
|
||||
if (AnfAlgo::IsDynamicShape(transpose_cnode) || AnfAlgo::IsDynamicShape(reshape_cnode)) {
|
||||
return nullptr;
|
||||
}
|
||||
std::vector<size_t> reshape_input0_shape = AnfAlgo::GetPrevNodeOutputInferShape(reshape_cnode, 0);
|
||||
std::vector<size_t> transpose_output0_shape = AnfAlgo::GetOutputInferShape(transpose_cnode, 0);
|
||||
if (!CheckShapeDimInfo(reshape_input0_shape) || !CheckShapeDimInfo(transpose_output0_shape)) {
|
||||
|
|
|
@ -50,6 +50,9 @@ const AnfNodePtr TransposeReshapeFusion::Process(const FuncGraphPtr &func_graph,
|
|||
MS_EXCEPTION_IF_NULL(reshape_cnode);
|
||||
auto transpose_cnode = CheckAnfNodeIfCNodeAndInputSize(reshape_cnode->input(1), kBackendReshapeInputNum);
|
||||
MS_EXCEPTION_IF_NULL(transpose_cnode);
|
||||
if (AnfAlgo::IsDynamicShape(transpose_cnode) || AnfAlgo::IsDynamicShape(reshape_cnode)) {
|
||||
return nullptr;
|
||||
}
|
||||
std::vector<size_t> reshape_output0_shape = AnfAlgo::GetOutputInferShape(reshape_cnode, 0);
|
||||
std::vector<size_t> transpose_input0_shape = AnfAlgo::GetPrevNodeOutputInferShape(transpose_cnode, 0);
|
||||
if (!CheckShapeDimInfo(reshape_output0_shape) || !CheckShapeDimInfo(transpose_input0_shape)) {
|
||||
|
|
|
@ -26,6 +26,8 @@
|
|||
#include "base/base_ref.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "base/core_ops.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.h"
|
||||
#include "frontend/operator/ops.h"
|
||||
#include "utils/ms_utils.h"
|
||||
#include "runtime/device/kernel_info.h"
|
||||
#include "utils/ms_context.h"
|
||||
|
@ -394,6 +396,7 @@ bool IsNopNode(const AnfNodePtr &node) {
|
|||
context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET) != kGPUDevice) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static std::unordered_set<std::string> nop_nodes = {prim::kPrimReshape->name(), kExpandDimsOpName,
|
||||
prim::kPrimSqueeze->name(), prim::kPrimFlatten->name(),
|
||||
kFlattenGradOpName};
|
||||
|
|
|
@ -55,6 +55,10 @@ const AnfNodePtr ConvertConstInputToAttr::Process(const FuncGraphPtr &, const An
|
|||
continue;
|
||||
}
|
||||
}
|
||||
if (AnfAlgo::IsDynamicShape(cnode)) {
|
||||
MS_LOG(INFO) << "current node is dynamic shape " << cnode->fullname_with_scope();
|
||||
continue;
|
||||
}
|
||||
ConstInputToAttr(cnode, reg.GetConstInputAttrInfo());
|
||||
}
|
||||
return node;
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "backend/kernel_compiler/kernel.h"
|
||||
#include "backend/kernel_compiler/kernel_build_info.h"
|
||||
#include "common/trans.h"
|
||||
#include "abstract/param_validator.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace session {
|
||||
|
@ -42,12 +43,27 @@ namespace {
|
|||
constexpr size_t kNopNodeInputSize = 2;
|
||||
constexpr size_t kNopNodeRealInputIndex = 1;
|
||||
|
||||
bool IsShapeDynamic(const abstract::ShapePtr &shape) {
|
||||
MS_EXCEPTION_IF_NULL(shape);
|
||||
return std::any_of(shape->shape().begin(), shape->shape().end(), [](int s) { return s < 0; });
|
||||
}
|
||||
|
||||
std::vector<size_t> TransShapeToSizet(const abstract::ShapePtr &shape) {
|
||||
MS_EXCEPTION_IF_NULL(shape);
|
||||
std::vector<size_t> shape_size_t;
|
||||
std::transform(shape->shape().begin(), shape->shape().end(), std::back_inserter(shape_size_t), IntToSize);
|
||||
if (IsShapeDynamic(shape)) {
|
||||
if (std::all_of(shape->max_shape().begin(), shape->max_shape().end(), [](int s) { return s >= 0; })) {
|
||||
std::transform(shape->max_shape().begin(), shape->max_shape().end(), std::back_inserter(shape_size_t), IntToSize);
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Invalid Max Shape";
|
||||
}
|
||||
} else {
|
||||
std::transform(shape->shape().begin(), shape->shape().end(), std::back_inserter(shape_size_t), IntToSize);
|
||||
}
|
||||
return shape_size_t;
|
||||
}
|
||||
|
||||
enum ShapeType { kMaxShape, kMinShape };
|
||||
} // namespace
|
||||
|
||||
AnfNodePtr AnfRuntimeAlgorithm::GetTupleGetItemRealInput(const CNodePtr &tuple_get_item) {
|
||||
|
@ -1206,19 +1222,6 @@ TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputPrecision(const AnfNodePtr &node, s
|
|||
return GetCNodeOutputPrecision(kernel_with_index.first);
|
||||
}
|
||||
|
||||
bool AnfRuntimeAlgorithm::IsDynamicShape(const AnfNodePtr &node) {
|
||||
if (!node->isa<CNode>()) {
|
||||
return false;
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto has_attr = AnfAlgo::HasNodeAttr(kAttrIsDynamicShape, cnode);
|
||||
if (!has_attr) {
|
||||
return false;
|
||||
}
|
||||
return AnfAlgo::GetNodeAttr<bool>(node, kAttrIsDynamicShape);
|
||||
}
|
||||
|
||||
bool AnfRuntimeAlgorithm::IsCondControlKernel(const CNodePtr &node) {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (node->inputs().empty()) {
|
||||
|
@ -1252,5 +1255,96 @@ bool AnfRuntimeAlgorithm::IsIndependentNode(const CNodePtr &node) {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AnfRuntimeAlgorithm::GetBooleanAttr(const AnfNodePtr &node, const std::string &attr) {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (!node->isa<CNode>()) {
|
||||
return false;
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto has_attr = AnfAlgo::HasNodeAttr(attr, cnode);
|
||||
if (!has_attr) {
|
||||
return false;
|
||||
}
|
||||
return AnfAlgo::GetNodeAttr<bool>(node, attr);
|
||||
}
|
||||
|
||||
bool AnfRuntimeAlgorithm::IsDynamicShape(const AnfNodePtr &node) {
|
||||
return GetBooleanAttr(node, kAttrInputIsDynamicShape) || GetBooleanAttr(node, kAttrOutputIsDynamicShape);
|
||||
}
|
||||
|
||||
void AnfRuntimeAlgorithm::GetRealDynamicShape(const std::vector<size_t> &shape,
|
||||
NotNull<std::vector<int64_t> *> dynamic_shape) {
|
||||
for (auto size : shape) {
|
||||
if (size == SIZE_MAX) {
|
||||
dynamic_shape->push_back(-1);
|
||||
} else {
|
||||
dynamic_shape->push_back(SizeToLong(size));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> GetShapeFromSequeueShape(const abstract::SequeueShapePtr &sequeue_shape_ptr, size_t index,
|
||||
ShapeType type) {
|
||||
MS_EXCEPTION_IF_NULL(sequeue_shape_ptr);
|
||||
auto shape_list = sequeue_shape_ptr->shape();
|
||||
if (index >= shape_list.size()) {
|
||||
MS_LOG(EXCEPTION) << "Output Index:" << index << " >= " << shape_list.size();
|
||||
}
|
||||
|
||||
auto shape = shape_list[index];
|
||||
MS_EXCEPTION_IF_NULL(shape);
|
||||
if (shape->isa<abstract::Shape>()) {
|
||||
auto shape_ptr = shape->cast<abstract::ShapePtr>();
|
||||
if (type == kMaxShape) {
|
||||
return shape_ptr->max_shape().empty() ? shape_ptr->shape() : shape_ptr->max_shape();
|
||||
} else {
|
||||
return shape_ptr->min_shape().empty() ? shape_ptr->shape() : shape_ptr->min_shape();
|
||||
}
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Invalid Shape Type In Shape List";
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> AnfRuntimeAlgorithm::GetInputMaxShape(const AnfNodePtr &anf_node, size_t index) {
|
||||
auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(anf_node, index);
|
||||
return GetOutputMaxShape(input_node_with_index.first, input_node_with_index.second);
|
||||
}
|
||||
|
||||
std::vector<int> AnfRuntimeAlgorithm::GetInputMinShape(const AnfNodePtr &anf_node, size_t index) {
|
||||
auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(anf_node, index);
|
||||
return GetOutputMinShape(input_node_with_index.first, input_node_with_index.second);
|
||||
}
|
||||
|
||||
std::vector<int> AnfRuntimeAlgorithm::GetOutputMaxShape(const AnfNodePtr &anf_node, size_t index) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
auto shape = anf_node->Shape();
|
||||
MS_EXCEPTION_IF_NULL(shape);
|
||||
if (shape->isa<abstract::Shape>()) {
|
||||
auto shape_ptr = shape->cast<abstract::ShapePtr>();
|
||||
return shape_ptr->max_shape().empty() ? shape_ptr->shape() : shape_ptr->max_shape();
|
||||
} else if (shape->isa<abstract::SequeueShape>()) {
|
||||
auto shape_ptr = shape->cast<abstract::SequeueShapePtr>();
|
||||
return GetShapeFromSequeueShape(shape_ptr, index, kMaxShape);
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Invalid Shape Type";
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> AnfRuntimeAlgorithm::GetOutputMinShape(const AnfNodePtr &anf_node, size_t index) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
auto shape = anf_node->Shape();
|
||||
MS_EXCEPTION_IF_NULL(shape);
|
||||
if (shape->isa<abstract::Shape>()) {
|
||||
auto shape_ptr = shape->cast<abstract::ShapePtr>();
|
||||
return shape_ptr->min_shape().empty() ? shape_ptr->shape() : shape_ptr->min_shape();
|
||||
} else if (shape->isa<abstract::SequeueShape>()) {
|
||||
auto shape_ptr = shape->cast<abstract::SequeueShapePtr>();
|
||||
return GetShapeFromSequeueShape(shape_ptr, index, kMinShape);
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Invalid Shape Type";
|
||||
}
|
||||
}
|
||||
} // namespace session
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -221,6 +221,12 @@ class AnfRuntimeAlgorithm {
|
|||
static bool IsDynamicShape(const AnfNodePtr &node);
|
||||
static bool IsCondControlKernel(const CNodePtr &node);
|
||||
static bool IsIndependentNode(const CNodePtr &node);
|
||||
static bool GetBooleanAttr(const AnfNodePtr &node, const std::string &attr);
|
||||
static void GetRealDynamicShape(const std::vector<size_t> &shape, NotNull<std::vector<int64_t> *> dynamic_shape);
|
||||
static std::vector<int> GetInputMaxShape(const AnfNodePtr &anf_node, size_t index);
|
||||
static std::vector<int> GetInputMinShape(const AnfNodePtr &anf_node, size_t index);
|
||||
static std::vector<int> GetOutputMaxShape(const AnfNodePtr &anf_node, size_t index);
|
||||
static std::vector<int> GetOutputMinShape(const AnfNodePtr &anf_node, size_t index);
|
||||
};
|
||||
} // namespace session
|
||||
using AnfAlgo = session::AnfRuntimeAlgorithm;
|
||||
|
|
|
@ -127,6 +127,9 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
|
|||
MS_LOG(INFO) << "Start";
|
||||
std::vector<KernelGraphPtr> all_graphs;
|
||||
auto root_graph = ConstructKernelGraph(func_graph, &all_graphs);
|
||||
// Update Graph Dynamic Shape Attr
|
||||
UpdateGraphDynamicShapeAttr(NOT_NULL(root_graph));
|
||||
root_graph->UpdateGraphDynamicAttr();
|
||||
BackendOptimization(all_graphs);
|
||||
// empty graph dont entry to backend
|
||||
if (root_graph->execution_order().empty()) {
|
||||
|
@ -136,6 +139,7 @@ GraphId AscendSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
|
|||
InitRuntimeResource();
|
||||
return root_graph->graph_id();
|
||||
}
|
||||
|
||||
// create parameter for multiple branch
|
||||
std::set<KernelGraphPtr> memo;
|
||||
CreateMultiBranchOutput(NOT_NULL(root_graph), NOT_NULL(&memo));
|
||||
|
|
|
@ -1201,6 +1201,17 @@ void KernelGraph::RemoveNodeFromGraph(const AnfNodePtr &node) {
|
|||
}
|
||||
}
|
||||
|
||||
void KernelGraph::UpdateGraphDynamicAttr() {
|
||||
for (const auto &cnode : execution_order_) {
|
||||
if (AnfAlgo::IsDynamicShape(cnode)) {
|
||||
MS_LOG(INFO) << "Update Graph Dynamic Attr";
|
||||
is_dynamic_shape_ = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
is_dynamic_shape_ = false;
|
||||
}
|
||||
|
||||
std::string KernelGraph::ToString() const { return std::string("kernel_graph_").append(std::to_string(graph_id_)); }
|
||||
|
||||
KernelGraph::~KernelGraph() {
|
||||
|
|
|
@ -37,7 +37,13 @@ namespace session {
|
|||
using AnfWithOutIndex = std::pair<AnfNodePtr, size_t>;
|
||||
class KernelGraph : public FuncGraph {
|
||||
public:
|
||||
KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), null_output_(false), current_epoch_(0) {
|
||||
KernelGraph()
|
||||
: graph_id_(0),
|
||||
start_label_(nullptr),
|
||||
end_goto_(nullptr),
|
||||
null_output_(false),
|
||||
current_epoch_(0),
|
||||
is_dynamic_shape_(false) {
|
||||
inputs_ = std::make_shared<std::vector<AnfNodePtr>>();
|
||||
execution_order_ = {};
|
||||
executable_ = true;
|
||||
|
@ -161,6 +167,7 @@ class KernelGraph : public FuncGraph {
|
|||
void set_child_graph_result(const std::vector<AnfNodePtr> &child_graph_result) {
|
||||
child_graph_result_ = child_graph_result;
|
||||
}
|
||||
|
||||
void InsertTupleParameterToMakeTupleMap(const AnfNodePtr ¶m, const AnfNodePtr &make_tuple) {
|
||||
if (tuple_parameter_to_make_tuple_map_.find(param) != tuple_parameter_to_make_tuple_map_.end()) {
|
||||
return;
|
||||
|
@ -176,6 +183,9 @@ class KernelGraph : public FuncGraph {
|
|||
}
|
||||
void RemoveNodeFromGraph(const AnfNodePtr &node);
|
||||
|
||||
void UpdateGraphDynamicAttr();
|
||||
bool is_dynamic_shape() const { return is_dynamic_shape_; }
|
||||
|
||||
private:
|
||||
// remove value node form graph
|
||||
bool RemoveValueNodeFromGraph(const ValueNodePtr &value_node);
|
||||
|
@ -247,10 +257,10 @@ class KernelGraph : public FuncGraph {
|
|||
std::unordered_map<AnfNodePtr, std::unordered_map<int, tensor::TensorPtr>> internal_outputs_tensor_map_;
|
||||
uint32_t current_epoch_;
|
||||
std::unordered_map<AnfNodePtr, AnfNodePtr> tuple_parameter_to_make_tuple_map_;
|
||||
|
||||
std::set<AnfNodePtr> visited_nodes_;
|
||||
std::map<AnfNodePtr, AnfNodePtr> edge_to_;
|
||||
std::stack<AnfNodePtr> loop_nodes_;
|
||||
bool is_dynamic_shape_;
|
||||
};
|
||||
} // namespace session
|
||||
using KernelGraphPtr = std::shared_ptr<session::KernelGraph>;
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include "ir/dtype.h"
|
||||
#include "ir/anf.h"
|
||||
#include "ir/func_graph_cloner.h"
|
||||
#include "utils/utils.h"
|
||||
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
|
||||
#include "ps/worker.h"
|
||||
#include "ps/common.h"
|
||||
|
@ -1405,6 +1406,97 @@ void SessionBasic::RunGraphAsync(const GraphId &graph_id, const std::vector<tens
|
|||
executor_->RunGraphAsync(shared_from_this(), graph_id, inputs, outputs);
|
||||
}
|
||||
|
||||
bool IsDynamicShape(const NotNull<abstract::ShapePtr> &shape) {
|
||||
return !std::all_of(shape->shape().begin(), shape->shape().end(), [](int s) { return s > 0; });
|
||||
}
|
||||
|
||||
bool IsNodeOutputDynamicShape(const CNodePtr &anf_node_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node_ptr);
|
||||
auto base_shape = anf_node_ptr->Shape();
|
||||
if (base_shape == nullptr) {
|
||||
MS_LOG(INFO) << "Invalid bash shape ptr, node:" << anf_node_ptr->fullname_with_scope();
|
||||
return false;
|
||||
}
|
||||
if (base_shape->isa<abstract::Shape>()) {
|
||||
if (IsDynamicShape(NOT_NULL(base_shape->cast<abstract::ShapePtr>()))) {
|
||||
return true;
|
||||
}
|
||||
} else if (base_shape->isa<abstract::TupleShape>()) {
|
||||
auto tuple_shape = base_shape->cast<abstract::TupleShapePtr>();
|
||||
MS_EXCEPTION_IF_NULL(tuple_shape);
|
||||
|
||||
for (size_t i = 0; i < tuple_shape->size(); ++i) {
|
||||
auto b_shp = (*tuple_shape)[i];
|
||||
if (!b_shp->isa<abstract::Shape>()) {
|
||||
continue;
|
||||
}
|
||||
if (IsDynamicShape(NOT_NULL(b_shp->cast<abstract::ShapePtr>()))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsNodeInputDynamicShape(const CNodePtr &anf_node_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node_ptr);
|
||||
auto input_num = AnfAlgo::GetInputTensorNum(anf_node_ptr);
|
||||
for (size_t i = 0; i < input_num; ++i) {
|
||||
auto input_with_index = AnfAlgo::GetPrevNodeOutput(anf_node_ptr, i);
|
||||
auto input = input_with_index.first;
|
||||
auto index = input_with_index.second;
|
||||
MS_EXCEPTION_IF_NULL(input);
|
||||
|
||||
auto base_shape = input->Shape();
|
||||
if (base_shape == nullptr) {
|
||||
MS_LOG(INFO) << "Invalid shape ptr, node:" << input->fullname_with_scope();
|
||||
continue;
|
||||
}
|
||||
if (base_shape->isa<abstract::Shape>()) {
|
||||
if (IsDynamicShape(NOT_NULL(base_shape->cast<abstract::ShapePtr>()))) {
|
||||
return true;
|
||||
}
|
||||
} else if (base_shape->isa<abstract::TupleShape>()) {
|
||||
auto tuple_shape = base_shape->cast<abstract::TupleShapePtr>();
|
||||
MS_EXCEPTION_IF_NULL(tuple_shape);
|
||||
|
||||
if (index >= tuple_shape->size()) {
|
||||
MS_LOG(INFO) << "Node:" << anf_node_ptr->fullname_with_scope() << "Invalid index:" << index
|
||||
<< " and tuple_shape size:" << tuple_shape->size();
|
||||
continue;
|
||||
}
|
||||
|
||||
auto b_shp = (*tuple_shape)[index];
|
||||
if (!b_shp->isa<abstract::Shape>()) {
|
||||
continue;
|
||||
}
|
||||
if (IsDynamicShape(NOT_NULL(b_shp->cast<abstract::ShapePtr>()))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void SessionBasic::UpdateGraphDynamicShapeAttr(const NotNull<KernelGraphPtr> &root_graph) {
|
||||
for (const auto &cnode : root_graph->execution_order()) {
|
||||
auto output_dynamic = IsNodeOutputDynamicShape(NOT_NULL(cnode));
|
||||
auto input_dynamic = IsNodeInputDynamicShape(NOT_NULL(cnode));
|
||||
if (output_dynamic || input_dynamic) {
|
||||
AnfAlgo::SetNodeAttr(kAttrIsDynamicShape, MakeValue(true), cnode);
|
||||
MS_LOG(INFO) << "Set Dynamic Shape Attr to Node:" << cnode->fullname_with_scope();
|
||||
}
|
||||
if (output_dynamic) {
|
||||
AnfAlgo::SetNodeAttr(kAttrOutputIsDynamicShape, MakeValue(true), cnode);
|
||||
MS_LOG(INFO) << "Set Output Dynamic Shape Attr to Node:" << cnode->fullname_with_scope();
|
||||
}
|
||||
if (input_dynamic) {
|
||||
AnfAlgo::SetNodeAttr(kAttrInputIsDynamicShape, MakeValue(true), cnode);
|
||||
MS_LOG(INFO) << "Set Input Dynamic Shape Attr to Node:" << cnode->fullname_with_scope();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
|
||||
void SessionBasic::AssignParamKey(const KernelGraphPtr &kernel_graph) {
|
||||
if (!ps::Util::IsRoleOfWorker()) {
|
||||
|
|
|
@ -172,6 +172,7 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
|
|||
void AddParameterToGraphInputs(const std::vector<AnfNodePtr> ¶meters, KernelGraph *graph);
|
||||
void InitInternalOutputParameter(const AnfNodePtr &out_node, const AnfNodePtr ¶meter);
|
||||
AnfNodePtr FindPullNode(const AnfNodePtr &push_node, const std::vector<AnfNodePtr> &node_list);
|
||||
void UpdateGraphDynamicShapeAttr(const NotNull<KernelGraphPtr> &root_graph);
|
||||
|
||||
std::unordered_map<GraphId, std::shared_ptr<KernelGraph>> graphs_;
|
||||
std::unordered_map<GraphInfo, std::shared_ptr<KernelGraph>> run_op_graphs_;
|
||||
|
|
|
@ -713,5 +713,16 @@ EvalResultPtr EvalOnePrim(const PrimitivePtr &primitive, const AbstractBasePtrLi
|
|||
auto eval_result = trivial_evaluator->EvalPrim(nullptr, arg_specs);
|
||||
return eval_result;
|
||||
}
|
||||
|
||||
AbstractBasePtr CppInferShape(const PrimitivePtr &prim, const AbstractBasePtrList &args_spec_list) {
|
||||
MS_EXCEPTION_IF_NULL(prim);
|
||||
auto &prim_eval_implement_map = GetPrimitiveToEvalImplMap();
|
||||
auto ret = prim_eval_implement_map.find(prim);
|
||||
if (ret == prim_eval_implement_map.end()) {
|
||||
MS_LOG(EXCEPTION) << "Get infer shape function failed, primitive name:" << prim->name()
|
||||
<< " primitive type:" << prim->type_name();
|
||||
}
|
||||
return ret->second.impl_(nullptr, prim, args_spec_list);
|
||||
}
|
||||
} // namespace abstract
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -302,6 +302,8 @@ AbstractBasePtr FromValue(const T &value, bool broaden = false) {
|
|||
}
|
||||
|
||||
EvalResultPtr EvalOnePrim(const PrimitivePtr &p, const AbstractBasePtrList &arg_specs);
|
||||
|
||||
AbstractBasePtr CppInferShape(const PrimitivePtr &prim, const AbstractBasePtrList &args_spec_list);
|
||||
} // namespace abstract
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "common/*.cc"
|
||||
"kernel_info.cc" "kernel_runtime.cc" "memory_manager.cc" "kernel_runtime_manager.cc" "convert_tensor_utils.cc"
|
||||
"kernel_info.cc" "executor/dynamic_kernel.cc" "kernel_runtime.cc" "memory_manager.cc" "kernel_runtime_manager.cc" "convert_tensor_utils.cc"
|
||||
)
|
||||
|
||||
if (ENABLE_GPU)
|
||||
|
|
|
@ -372,7 +372,7 @@ kernel::KernelModPtr AscendDeviceAddress::CompileTransDataAndObtainKernelMod(con
|
|||
// get size
|
||||
std::vector<size_t> input_size_list;
|
||||
std::vector<size_t> output_size_list;
|
||||
(void)kernel::TbeKernelBuild::GetIOSize(kernel_json, &input_size_list, &output_size_list);
|
||||
(void)kernel::TbeKernelBuild::GetIOSize(kernel_json, &input_size_list, &output_size_list, nullptr);
|
||||
std::string json_name = kernel_json[op_info_str][kernel_name_str];
|
||||
// op build
|
||||
if (constructed_kernel.find(json_name) == constructed_kernel.end()) {
|
||||
|
@ -382,15 +382,15 @@ kernel::KernelModPtr AscendDeviceAddress::CompileTransDataAndObtainKernelMod(con
|
|||
while (!build_manager->IsAllTaskFinish()) {
|
||||
int task_id = -1;
|
||||
std::string task_result;
|
||||
std::string pre_build_result;
|
||||
auto ret = build_manager->WaitOne(&task_id, &task_result, &pre_build_result);
|
||||
std::string build_result;
|
||||
auto ret = build_manager->WaitOne(&task_id, &task_result, &build_result);
|
||||
if (!ret) {
|
||||
MS_EXCEPTION(ArgumentError) << "Build Failed. wait one ret:" << ret << ", task id:" << task_id;
|
||||
}
|
||||
if (task_result != "Success") {
|
||||
MS_EXCEPTION(ArgumentError) << "task compile Failed, task id:" << task_id << ", cause:" << task_result;
|
||||
}
|
||||
(void)build_manager->TaskFinishProcess(task_id, false);
|
||||
(void)build_manager->TaskFinishProcess(task_id, build_result, false);
|
||||
}
|
||||
constructed_kernel.insert(json_name);
|
||||
// search cache
|
||||
|
|
|
@ -46,12 +46,22 @@
|
|||
#ifdef MEM_REUSE_DEBUG
|
||||
#include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
|
||||
#endif
|
||||
#include "runtime/device/ascend/executor/tiling/op_tiling_calculater.h"
|
||||
#include "runtime/device/ascend/executor/executor_callback.h"
|
||||
#include "runtime/device/ascend/executor/hccl_dynamic_kernel.h"
|
||||
#include "profiler/device/ascend/ascend_profiling.h"
|
||||
#include "profiler/device/ascend/profiling_context.h"
|
||||
#include "profiler/device/ascend/rt_callback_manager.h"
|
||||
|
||||
using ge::model_runner::ModelRunner;
|
||||
using mindspore::device::ascend::ProfilingManager;
|
||||
using mindspore::device::ascend::ProfilingUtils;
|
||||
using mindspore::device::ascend::tasksink::TaskGenerator;
|
||||
using mindspore::kernel::tbe::TbeUtils;
|
||||
using mindspore::profiler::ascend::AscendProfiler;
|
||||
using mindspore::profiler::ascend::CallbackManager;
|
||||
using mindspore::profiler::ascend::GetTid;
|
||||
using mindspore::profiler::ascend::kCallback;
|
||||
using std::vector;
|
||||
|
||||
constexpr uint32_t kTupleTaskId = 0;
|
||||
|
@ -135,6 +145,8 @@ void AscendKernelRuntime::ClearGraphModelMap() {
|
|||
// tell users which dump kernel name not used
|
||||
DumpJsonParser::GetInstance().PrintUnusedKernel();
|
||||
|
||||
graph_dynamic_kernel_map_.clear();
|
||||
|
||||
for (auto &iter : graph_model_map_) {
|
||||
MS_LOG(INFO) << "Ge UnloadModel " << iter.first;
|
||||
auto ret = ModelRunner::Instance().UnloadModel(iter.first);
|
||||
|
@ -160,6 +172,13 @@ void AscendKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std
|
|||
MS_LOG(DEBUG) << "GraphId:" << graph_id << " not found";
|
||||
}
|
||||
|
||||
MS_LOG(DEBUG) << "Clear graph:" << graph_id << " dynamic kernels";
|
||||
if (auto dynamic_kernel_iter = graph_dynamic_kernel_map_.find(graph_id);
|
||||
dynamic_kernel_iter != graph_dynamic_kernel_map_.end()) {
|
||||
MS_LOG(DEBUG) << "Start Clear graph:" << graph_id << " dynamic kernel";
|
||||
graph_dynamic_kernel_map_.erase(dynamic_kernel_iter);
|
||||
}
|
||||
|
||||
MS_LOG(DEBUG) << "Clear graph:" << graph_id << " runtime resource";
|
||||
if (auto model_iter = graph_model_map_.find(graph_id); model_iter != graph_model_map_.end()) {
|
||||
MS_LOG(DEBUG) << "Ge UnloadModel " << graph_id;
|
||||
|
@ -233,6 +252,7 @@ bool AscendKernelRuntime::Init() {
|
|||
InnerSetContext();
|
||||
return true;
|
||||
}
|
||||
OpTilingCalculater::GetInstance().Init();
|
||||
// Start up profiling before rtSetDevice
|
||||
bool ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
|
||||
if (!ret) {
|
||||
|
@ -342,6 +362,11 @@ bool AscendKernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) {
|
|||
if (!is_task_sink) {
|
||||
return true;
|
||||
}
|
||||
// Do HcomExecutorInitialize
|
||||
if (graph->is_dynamic_shape() && !HcclExecutorManager::GetInstance().Initialize()) {
|
||||
MS_LOG(ERROR) << "Init Hccl Executor Failed";
|
||||
return false;
|
||||
}
|
||||
if (!GenTask(graph)) {
|
||||
return false;
|
||||
}
|
||||
|
@ -351,8 +376,35 @@ bool AscendKernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AscendKernelRuntime::GenDynamicKernel(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MS_LOG(INFO) << "GenDynamicKernel start";
|
||||
auto cnode_list = graph->execution_order();
|
||||
std::vector<DynamicKernelPtr> dynamic_kernels;
|
||||
for (const auto &cnode : cnode_list) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_LOG(INFO) << "Generate node:" << cnode->fullname_with_scope() << " dynamic kernel";
|
||||
auto kernel_mod = AnfAlgo::GetKernelMod(cnode);
|
||||
auto dynamic_kernel = kernel_mod->GenDynamicKernel(cnode, stream_);
|
||||
MS_EXCEPTION_IF_NULL(dynamic_kernel);
|
||||
dynamic_kernel->Initialize();
|
||||
dynamic_kernels.emplace_back(dynamic_kernel);
|
||||
}
|
||||
auto ret = graph_dynamic_kernel_map_.try_emplace(graph->graph_id(), dynamic_kernels);
|
||||
if (!ret.second) {
|
||||
MS_LOG(ERROR) << "Graph:" << graph->graph_id() << " already generator executor";
|
||||
return false;
|
||||
}
|
||||
MS_LOG(INFO) << "GenDynamicKernel end";
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
|
||||
InnerSetContext();
|
||||
if (graph->is_dynamic_shape()) {
|
||||
MS_LOG(INFO) << "Dynamic Shape Graph Generate Dynamic kernel";
|
||||
return GenDynamicKernel(graph);
|
||||
}
|
||||
if (graph == nullptr) {
|
||||
MS_EXCEPTION(NotExistsError) << "session::KernelGraph is NULL!";
|
||||
}
|
||||
|
@ -407,6 +459,11 @@ bool AscendKernelRuntime::GenTask(const session::KernelGraph *graph) {
|
|||
|
||||
bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
|
||||
InnerSetContext();
|
||||
if (graph->is_dynamic_shape()) {
|
||||
MS_LOG(INFO) << "Dynamic Shape Graph Skip Load Task Step";
|
||||
return true;
|
||||
}
|
||||
|
||||
if (graph == nullptr) {
|
||||
MS_EXCEPTION(NotExistsError) << "Null pointer graph, LoadTask failed. ";
|
||||
}
|
||||
|
@ -520,9 +577,70 @@ bool AscendKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink, De
|
|||
return ret;
|
||||
}
|
||||
|
||||
bool AscendKernelRuntime::RunDynamicKernelAsync(const session::KernelGraph *graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MS_LOG(INFO) << "RunExecutorAsync start. GraphId:" << graph->graph_id();
|
||||
|
||||
auto iter = graph_dynamic_kernel_map_.find(graph->graph_id());
|
||||
if (iter == graph_dynamic_kernel_map_.end()) {
|
||||
MS_LOG(ERROR) << "GraphId:" << graph->graph_id() << " Not Found! Please generator executor first";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Profiling Init
|
||||
auto &async_profiler = AscendProfiler::GetInstance();
|
||||
auto &rt_callback = CallbackManager::GetInstance(stream_);
|
||||
rt_callback.Init();
|
||||
|
||||
auto dynamic_kernels = iter->second;
|
||||
for (const auto &dynamic_kernel : dynamic_kernels) {
|
||||
if (dynamic_kernel->have_depends()) {
|
||||
MS_LOG(INFO) << "Match Dynamic Kernel, Start SyncStream";
|
||||
if (!SyncStream()) {
|
||||
MS_LOG(ERROR) << "SyncStream failed";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (dynamic_kernel->is_dynamic_shape()) {
|
||||
ExecutorCallback::GetInstance().Consume();
|
||||
dynamic_kernel->InferShape();
|
||||
dynamic_kernel->UpdateArgs();
|
||||
}
|
||||
|
||||
// Enable profiling trace point start
|
||||
rt_callback.RegisterCallback(
|
||||
[&]() { RECORD_CALLBACK_EVENT(&async_profiler, dynamic_kernel->GetKernelName().c_str(), "[Callback] start"); });
|
||||
|
||||
dynamic_kernel->Execute();
|
||||
|
||||
// Enable profiling trace point end
|
||||
rt_callback.RegisterCallback(
|
||||
[&]() { RECORD_CALLBACK_EVENT(&async_profiler, dynamic_kernel->GetKernelName().c_str(), "[Callback] end"); });
|
||||
|
||||
ExecutorCallback::GetInstance().RegistCallback([&dynamic_kernel] { dynamic_kernel->PostExecute(); });
|
||||
}
|
||||
|
||||
if (!SyncStream()) {
|
||||
MS_LOG(ERROR) << "SyncStream failed";
|
||||
return false;
|
||||
}
|
||||
ExecutorCallback::GetInstance().Consume();
|
||||
|
||||
rt_callback.Destroy();
|
||||
async_profiler.Dump(std::cout);
|
||||
async_profiler.Reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) {
|
||||
InnerSetContext();
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
if (graph->is_dynamic_shape()) {
|
||||
MS_LOG(INFO) << "Dynamic Shape Graph Run Task Async";
|
||||
return RunDynamicKernelAsync(graph);
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "RunTask start. GraphId:" << graph->graph_id();
|
||||
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
|
@ -657,7 +775,12 @@ bool AscendKernelRuntime::DestroyHccl() {
|
|||
MS_LOG(INFO) << "Hccl is not enable, no need to close.";
|
||||
return true;
|
||||
}
|
||||
// Dynamic Shape Hccl Finalize
|
||||
if (!HcclExecutorManager::GetInstance().Finalize()) {
|
||||
MS_LOG(ERROR) << "Dynamic Shape Hccl Finalize Failed";
|
||||
}
|
||||
HcclResult res = hcom_destroy();
|
||||
|
||||
if (res != HCCL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Hccl destroy failed";
|
||||
return false;
|
||||
|
|
|
@ -40,6 +40,8 @@ class AscendKernelRuntime : public KernelRuntime {
|
|||
bool Init() override;
|
||||
bool LoadData(session::KernelGraph *graph, Debugger *debugger) override;
|
||||
bool GenTask(const session::KernelGraph *graph);
|
||||
bool GenDynamicKernel(const session::KernelGraph *graph) override;
|
||||
bool RunDynamicKernelAsync(const session::KernelGraph *graph) override;
|
||||
bool LoadTask(const session::KernelGraph *graph);
|
||||
bool RunTask(const session::KernelGraph *graph);
|
||||
bool Load(session::KernelGraph *graph, bool is_task_sink) override;
|
||||
|
|
|
@ -34,7 +34,7 @@ const uint32_t kHcomMaxTask = 5;
|
|||
const uint32_t kCommonMaxTask = 350;
|
||||
|
||||
void AscendStreamAssign::AssignStream(const NotNull<KernelGraphPtr> &graph_ptr) {
|
||||
if (IsTaskSink()) {
|
||||
if (IsTaskSink() && !graph_ptr->is_dynamic_shape()) {
|
||||
Reset();
|
||||
SetLoopSink();
|
||||
ReorderIndependentOrders(graph_ptr);
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
#include "runtime/mem.h"
|
||||
#include "runtime/kernel.h"
|
||||
#include "runtime/rt_model.h"
|
||||
#include "runtime/device/ascend/dump/ge_dump.h"
|
||||
#include "runtime/device/ascend/ge_types_convert.h"
|
||||
#include "proto/op_mapping_info.pb.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "debug/data_dump/dump_json_parser.h"
|
||||
|
@ -369,13 +369,13 @@ void DataDumper::DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<ai
|
|||
auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel, i);
|
||||
|
||||
aicpu::dump::Output output;
|
||||
output.set_data_type(GetGeDataType(data_type));
|
||||
output.set_format(GetGeFormat(output_format, output_shape.size()));
|
||||
output.set_data_type(GeTypesConvert::GetGeDataType(data_type));
|
||||
output.set_format(GeTypesConvert::GetGeFormat(output_format, output_shape.size()));
|
||||
MS_EXCEPTION_IF_NULL(output.mutable_shape());
|
||||
for (auto dim : output_shape) {
|
||||
output.mutable_shape()->add_dim(dim);
|
||||
}
|
||||
output.set_original_output_format(GetGeFormat(output_format, output_shape.size()));
|
||||
output.set_original_output_format(GeTypesConvert::GetGeFormat(output_format, output_shape.size()));
|
||||
output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
|
||||
// device address data size
|
||||
auto address = AnfAlgo::GetOutputAddr(kernel, i);
|
||||
|
@ -409,8 +409,8 @@ void DataDumper::DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aic
|
|||
}
|
||||
auto output_shape = AnfAlgo::GetOutputDeviceShape(input_node, input_index);
|
||||
|
||||
input.set_data_type(GetGeDataType(output_type));
|
||||
input.set_format(GetGeFormat(output_format, output_shape.size()));
|
||||
input.set_data_type(GeTypesConvert::GetGeDataType(output_type));
|
||||
input.set_format(GeTypesConvert::GetGeFormat(output_format, output_shape.size()));
|
||||
MS_EXCEPTION_IF_NULL(input.mutable_shape());
|
||||
for (auto dim : output_shape) {
|
||||
input.mutable_shape()->add_dim(dim);
|
||||
|
|
|
@ -0,0 +1,182 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "runtime/device/ascend/executor/ai_core_dynamic_kernel.h"
|
||||
|
||||
#include <regex>
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include "framework/common/debug/log.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "runtime/device/ascend/executor/tiling/op_tiling_calculater.h"
|
||||
#include "register/op_tiling.h"
|
||||
#include "utils/convert_utils_base.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "runtime/device/kernel_runtime_manager.h"
|
||||
#include "pipeline/jit/static_analysis/static_analysis.h"
|
||||
#include "common/trans.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
AiCoreDynamicKernel::~AiCoreDynamicKernel() {
|
||||
if (tiling_data_ptr_ != nullptr) {
|
||||
auto ret = rtFree(tiling_data_ptr_);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "rtFree tiling_data_ptr_ failed";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AiCoreDynamicKernel::Execute() {
|
||||
if (stream_ == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "stream_ptr should not be nullptr.";
|
||||
}
|
||||
MS_LOG(INFO) << "Start Execute node:" << cnode_ptr_->fullname_with_scope();
|
||||
rtL2Ctrl_t *l2ctrl = nullptr;
|
||||
auto args_size = static_cast<uint32_t>(UlongToUint(sizeof(void *)) * runtime_args_.size());
|
||||
if (RT_ERROR_NONE != rtKernelLaunch(stub_func_, block_dim_, runtime_args_.data(), args_size, l2ctrl, stream_)) {
|
||||
MS_LOG(EXCEPTION) << "Call runtime rtKernelLaunch error.";
|
||||
}
|
||||
MS_LOG(INFO) << "End Execute node:" << cnode_ptr_->fullname_with_scope();
|
||||
}
|
||||
|
||||
std::string ReplaceInvalidJsonStr(const std::string &str) {
|
||||
auto ret = std::regex_replace(str, std::regex("100000000"), R"("100000000")");
|
||||
ret = std::regex_replace(ret, std::regex("100000001"), R"("100000001")");
|
||||
ret = std::regex_replace(ret, std::regex("100000002"), R"("100000002")");
|
||||
ret = std::regex_replace(ret, std::regex("True"), R"(true)");
|
||||
ret = std::regex_replace(ret, std::regex("False"), R"(false)");
|
||||
return ret;
|
||||
}
|
||||
|
||||
void AiCoreDynamicKernel::ParseCompileJson() {
|
||||
if (!AnfAlgo::IsDynamicShape(cnode_ptr_)) {
|
||||
return;
|
||||
}
|
||||
if (!AnfAlgo::HasNodeAttr(kAttrCompileInfo, cnode_ptr_)) {
|
||||
MS_LOG(EXCEPTION) << "Get compile_info failed";
|
||||
}
|
||||
auto compile_info_attr = AnfAlgo::GetNodeAttr<std::string>(cnode_ptr_, kAttrCompileInfo);
|
||||
std::replace(compile_info_attr.begin(), compile_info_attr.end(), '\'', '\"');
|
||||
compile_info_attr = ReplaceInvalidJsonStr(compile_info_attr);
|
||||
MS_LOG(INFO) << "Get compile_info:" << compile_info_attr;
|
||||
|
||||
try {
|
||||
compile_info_json_ = std::make_shared<nlohmann::json>(nlohmann::json::parse(compile_info_attr));
|
||||
} catch (nlohmann::json::parse_error &e) {
|
||||
MS_LOG(EXCEPTION) << "parse json failed, error:" << e.what();
|
||||
}
|
||||
|
||||
if (AnfAlgo::HasNodeAttr(kAttrFusionType, cnode_ptr_)) {
|
||||
auto fusion_type = AnfAlgo::GetNodeAttr<std::string>(cnode_ptr_, kAttrFusionType);
|
||||
MS_LOG(INFO) << "Get fusion_type:" << fusion_type;
|
||||
(*compile_info_json_)["_pattern"] = fusion_type;
|
||||
}
|
||||
}
|
||||
|
||||
void AiCoreDynamicKernel::Initialize() {
|
||||
DynamicKernel::Initialize();
|
||||
ParseCompileJson();
|
||||
}
|
||||
|
||||
void AiCoreDynamicKernel::UpdateArgs() {
|
||||
ComputeTiling();
|
||||
|
||||
if (!CopyTilingToDevice()) {
|
||||
MS_LOG(EXCEPTION) << "Copy tiling to device failed";
|
||||
}
|
||||
|
||||
AllocateWorkspace();
|
||||
|
||||
auto kernel_mod = AnfAlgo::GetKernelMod(cnode_ptr_);
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||
|
||||
AddressPtrList kernel_inputs;
|
||||
AddressPtrList kernel_workspaces;
|
||||
AddressPtrList kernel_outputs;
|
||||
KernelRuntime::GenLaunchArgs(*kernel_mod, cnode_ptr_, &kernel_inputs, &kernel_workspaces, &kernel_outputs);
|
||||
|
||||
runtime_args_.clear();
|
||||
|
||||
(void)std::transform(std::begin(kernel_inputs), std::end(kernel_inputs), std::back_inserter(runtime_args_),
|
||||
[](const AddressPtr &input) -> void * { return input->addr; });
|
||||
(void)std::transform(std::begin(kernel_outputs), std::end(kernel_outputs), std::back_inserter(runtime_args_),
|
||||
[](const AddressPtr &output) -> void * { return output->addr; });
|
||||
// Update workspace
|
||||
if (!workspace_addr_.empty()) {
|
||||
(void)std::transform(std::begin(workspace_addr_), std::end(workspace_addr_), std::back_inserter(runtime_args_),
|
||||
[](const DeviceAddressPtr &address_ptr) -> void * { return address_ptr->GetMutablePtr(); });
|
||||
}
|
||||
|
||||
if (is_dynamic_shape_ && !tiling_data_.empty() && tiling_data_ptr_ != nullptr) {
|
||||
runtime_args_.push_back(tiling_data_ptr_);
|
||||
}
|
||||
}
|
||||
|
||||
void AiCoreDynamicKernel::ComputeTiling() {
|
||||
MS_EXCEPTION_IF_NULL(cnode_ptr_);
|
||||
MS_LOG(INFO) << "Start compute tiling of:" << cnode_ptr_->fullname_with_scope();
|
||||
optiling::OpRunInfo op_run_info;
|
||||
|
||||
OpTilingCalculater::GetInstance().CalculateTiling(NOT_NULL(cnode_ptr_), NOT_NULL(compile_info_json_),
|
||||
depend_tensor_map_, NOT_NULL(&op_run_info));
|
||||
block_dim_ = op_run_info.block_dim;
|
||||
workspaces_size_ = op_run_info.workspaces;
|
||||
tiling_data_ = op_run_info.tiling_data.str();
|
||||
}
|
||||
|
||||
void AiCoreDynamicKernel::AllocateWorkspace() {
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
auto device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
||||
auto runtime_instance = KernelRuntimeManager::Instance().GetSingleKernelRuntime(kAscendDevice, device_id);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
|
||||
workspace_addr_.clear();
|
||||
for (auto size : workspaces_size_) {
|
||||
auto device_address_ptr = std::make_shared<AscendDeviceAddress>(nullptr, size);
|
||||
auto device_ptr = runtime_instance->MallocMem(MemType::kDynamicMem, size, device_address_ptr);
|
||||
if (device_ptr == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "MallocMem from memory pool failed";
|
||||
}
|
||||
workspace_addr_.emplace_back(device_address_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
bool AiCoreDynamicKernel::CopyTilingToDevice() {
|
||||
if (tiling_data_.size() > op_para_size_) {
|
||||
MS_LOG(EXCEPTION) << "compute tiling size:" << tiling_data_.size()
|
||||
<< " larger than tbe build op_para_size:" << op_para_size_;
|
||||
}
|
||||
|
||||
if (tiling_data_.empty() || tiling_data_ptr_ == nullptr) {
|
||||
MS_LOG(INFO) << "tiling size is 0, skip rtMemcpyAsync";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto ret = rtMemcpyAsync(tiling_data_ptr_, tiling_data_.size(), tiling_data_.c_str(), tiling_data_.size(),
|
||||
RT_MEMCPY_HOST_TO_DEVICE_EX, stream_);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "tiling rtMemcpyAsync failed, ret:" << ret;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void AiCoreDynamicKernel::PostExecute() {}
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,70 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_AI_CORE_DYNAMIC_KERNEL_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_AI_CORE_DYNAMIC_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "ir/tensor.h"
|
||||
#include "runtime/device/device_address.h"
|
||||
#include "mindspore/ccsrc/runtime/device/executor/dynamic_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
class AiCoreDynamicKernel : public DynamicKernel {
|
||||
public:
|
||||
AiCoreDynamicKernel(const void *stub_fubc, uint32_t block_dim, void *tiling_data_ptr, uint32_t op_para_size,
|
||||
void *stream, const CNodePtr &cnode_ptr, const std::vector<void *> &runtime_args)
|
||||
: DynamicKernel(stream, cnode_ptr),
|
||||
stub_func_(stub_fubc),
|
||||
block_dim_(block_dim),
|
||||
tiling_data_ptr_(tiling_data_ptr),
|
||||
op_para_size_(op_para_size),
|
||||
runtime_args_(runtime_args) {}
|
||||
~AiCoreDynamicKernel() override;
|
||||
|
||||
void Execute() override;
|
||||
void UpdateArgs() override;
|
||||
void Initialize() override;
|
||||
void PostExecute() override;
|
||||
|
||||
protected:
|
||||
void AllocateWorkspace();
|
||||
void ParseCompileJson();
|
||||
|
||||
private:
|
||||
const void *stub_func_;
|
||||
uint32_t block_dim_;
|
||||
void *tiling_data_ptr_; // device ptr
|
||||
uint32_t op_para_size_; // size of tiling_data_ptr_
|
||||
std::vector<void *> runtime_args_;
|
||||
std::string tiling_data_;
|
||||
std::vector<int64_t> workspaces_size_;
|
||||
std::vector<DeviceAddressPtr> workspace_addr_;
|
||||
std::shared_ptr<nlohmann::json> compile_info_json_;
|
||||
|
||||
void ComputeTiling();
|
||||
bool CopyTilingToDevice();
|
||||
};
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_AI_CORE_DYNAMIC_KERNEL_H_
|
|
@ -0,0 +1,204 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "runtime/device/ascend/executor/ai_cpu_dynamic_kernel.h"
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <algorithm>
|
||||
#include "runtime/mem.h"
|
||||
#include "runtime/kernel.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
|
||||
#include "runtime/device/ascend/executor/executor_callback.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
AiCpuDynamicKernel::~AiCpuDynamicKernel() {
|
||||
// free dev ptr
|
||||
if (ext_info_addr_dev_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
auto ret = rtFree(ext_info_addr_dev_);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "rtFree failed";
|
||||
}
|
||||
}
|
||||
|
||||
void AiCpuDynamicKernel::UpdateArgs() {
|
||||
if (!UpdateInputOutputAddr()) {
|
||||
MS_LOG(EXCEPTION) << "Update input output failed";
|
||||
}
|
||||
|
||||
if (is_dynamic_shape_ && !UpdateExtInfo()) {
|
||||
MS_LOG(EXCEPTION) << "Update ExtInfo failed";
|
||||
}
|
||||
}
|
||||
|
||||
void AiCpuDynamicKernel::Execute() {
|
||||
MS_LOG(INFO) << "Execute AiCpuDynamicKerenl Start";
|
||||
auto ret = rtCpuKernelLaunchWithFlag(
|
||||
reinterpret_cast<const void *>(so_name_.c_str()), reinterpret_cast<const void *>(kernel_name_.c_str()), 1,
|
||||
reinterpret_cast<const void *>(args_.data()), args_.size(), nullptr, stream_, RT_KERNEL_DEFAULT);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "Call rtCpuKernelLaunchWithFlag Failed";
|
||||
}
|
||||
}
|
||||
|
||||
void AiCpuDynamicKernel::Initialize() {
|
||||
// is dynamic
|
||||
MS_LOG(INFO) << "Initialize node:" << cnode_ptr_->fullname_with_scope();
|
||||
DynamicKernel::Initialize();
|
||||
|
||||
input_num_ = AnfAlgo::GetInputTensorNum(cnode_ptr_);
|
||||
output_num_ = AnfAlgo::GetOutputTensorNum(cnode_ptr_);
|
||||
|
||||
// Parse aicpu ext info
|
||||
if (is_dynamic_shape_) {
|
||||
MS_EXCEPTION_IF_NULL(cnode_ptr_);
|
||||
ext_info_handler_ =
|
||||
std::make_shared<AicpuExtInfoHandler>(cnode_ptr_->fullname_with_scope(), input_num_, output_num_, DEPEND_COMPUTE);
|
||||
ext_info_handler_->Parse(ext_info_data_);
|
||||
}
|
||||
|
||||
if (ext_info_data_.empty()) {
|
||||
MS_LOG(INFO) << "No need to copy to device, ext_info_data_ is empty. ";
|
||||
return;
|
||||
}
|
||||
|
||||
// Allocate ext info addr in device
|
||||
auto ret = rtMalloc(&ext_info_addr_dev_, ext_info_data_.size(), RT_MEMORY_HBM);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "Call rtMalloc ext_info_addr_dev_ failed";
|
||||
}
|
||||
ext_info_size_ = ext_info_data_.size();
|
||||
|
||||
ret = rtMemcpy(ext_info_addr_dev_, ext_info_size_, ext_info_data_.data(), ext_info_data_.size(),
|
||||
RT_MEMCPY_HOST_TO_DEVICE);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "Call rtMemcpy ext_info_addr_dev_ failed";
|
||||
}
|
||||
|
||||
auto aicpu_param_head = reinterpret_cast<kernel::AicpuParamHead *>(args_.data());
|
||||
aicpu_param_head->extInfoLength = ext_info_size_;
|
||||
aicpu_param_head->extInfoAddr = reinterpret_cast<uint64_t>(ext_info_addr_dev_);
|
||||
}
|
||||
|
||||
bool AiCpuDynamicKernel::UpdateInputOutputAddr() {
|
||||
std::vector<uint64_t> io_addrs;
|
||||
io_addrs.reserve(input_num_ + output_num_);
|
||||
|
||||
for (size_t i = 0; i < input_num_; ++i) {
|
||||
auto input_addr = AnfAlgo::GetPrevNodeOutputAddr(cnode_ptr_, i);
|
||||
io_addrs.emplace_back(reinterpret_cast<uintptr_t>(input_addr->GetMutablePtr()));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < output_num_; ++i) {
|
||||
auto output_addr = AnfAlgo::GetOutputAddr(cnode_ptr_, i);
|
||||
io_addrs.emplace_back(reinterpret_cast<uintptr_t>(output_addr->GetMutablePtr()));
|
||||
}
|
||||
|
||||
if (args_.empty()) {
|
||||
MS_LOG(ERROR) << "args_ is empty";
|
||||
return false;
|
||||
}
|
||||
|
||||
auto io_ptr = args_.data() + sizeof(kernel::AicpuParamHead);
|
||||
auto ret =
|
||||
memcpy_s(io_ptr, args_.size() - sizeof(kernel::AicpuParamHead), &io_addrs[0], sizeof(uint64_t) * io_addrs.size());
|
||||
if (ret != 0) {
|
||||
MS_LOG(EXCEPTION) << "Memcpy input output addr failed";
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AiCpuDynamicKernel::UpdateExtInfo() {
|
||||
MS_LOG(INFO) << "UpdateExtInfo of " << cnode_ptr_->fullname_with_scope() << " start";
|
||||
if (input_num_ == 0 && output_num_ == 0) {
|
||||
MS_LOG(INFO) << "Node:" << cnode_ptr_->fullname_with_scope() << " no need to update output shape";
|
||||
return true;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < input_num_; ++i) {
|
||||
ext_info_handler_->UpdateInputShapeAndType(i, NOT_NULL(cnode_ptr_));
|
||||
}
|
||||
|
||||
if (unknow_type_ != DEPEND_COMPUTE) {
|
||||
for (size_t i = 0; i < output_num_; ++i) {
|
||||
ext_info_handler_->UpdateOutputShapeAndType(i, NOT_NULL(cnode_ptr_));
|
||||
}
|
||||
}
|
||||
|
||||
auto ret = rtMemcpy(ext_info_addr_dev_, ext_info_size_, ext_info_handler_->GetExtInfo(),
|
||||
ext_info_handler_->GetExtInfoLen(), RT_MEMCPY_HOST_TO_DEVICE);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "UpdateExtInfo rtMemcpy failed";
|
||||
return false;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "UpdateExtInfo of " << cnode_ptr_->fullname_with_scope() << " end";
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AiCpuDynamicKernel::UpdateOutputShapeFromExtInfo() {
|
||||
if (input_num_ == 0) {
|
||||
MS_LOG(WARNING) << "input num is 0";
|
||||
return true;
|
||||
}
|
||||
MS_LOG(INFO) << "UpdateOutputShapeFromExtInfo start";
|
||||
auto ret = rtMemcpy(ext_info_handler_->GetExtInfo(), ext_info_handler_->GetExtInfoLen(), ext_info_addr_dev_,
|
||||
ext_info_size_, RT_MEMCPY_DEVICE_TO_HOST);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "rtMemcpy output shape failed";
|
||||
return false;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "rtMemcpy from device to host success";
|
||||
|
||||
std::vector<TypeId> type_ids;
|
||||
std::vector<std::vector<size_t>> shapes;
|
||||
|
||||
for (size_t i = 0; i < output_num_; ++i) {
|
||||
MS_LOG(INFO) << "Get output:" << output_num_ << " Shape";
|
||||
std::vector<int64_t> shape;
|
||||
TypeId type_id;
|
||||
ext_info_handler_->GetOutputShapeAndType(i, NOT_NULL(&shape), NOT_NULL(&type_id));
|
||||
|
||||
for (auto x : shape) {
|
||||
MS_LOG(INFO) << "Update output:" << i << " shape:" << x;
|
||||
}
|
||||
|
||||
type_ids.emplace_back(type_id);
|
||||
std::vector<size_t> size_t_shape;
|
||||
std::transform(shape.begin(), shape.end(), std::back_inserter(size_t_shape), LongToSize);
|
||||
shapes.emplace_back(size_t_shape);
|
||||
}
|
||||
|
||||
AnfAlgo::SetOutputInferTypeAndShape(type_ids, shapes, cnode_ptr_.get());
|
||||
return true;
|
||||
}
|
||||
|
||||
void AiCpuDynamicKernel::PostExecute() {
|
||||
MS_LOG(INFO) << "Aicpu " << cnode_ptr_->fullname_with_scope() << " PostExecute";
|
||||
if (AnfAlgo::IsDynamicShape(cnode_ptr_) && unknow_type_ == DEPEND_COMPUTE) {
|
||||
MS_LOG(INFO) << "Update aicpu kernel output shape from ext_info";
|
||||
UpdateOutputShapeFromExtInfo();
|
||||
}
|
||||
}
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,76 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_AI_CPU_DYNAMIC_KERNEL_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_AI_CPU_DYNAMIC_KERNEL_H_
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "runtime/device/executor/dynamic_kernel.h"
|
||||
#include "ir/anf.h"
|
||||
#include "runtime/device/ascend/executor/aicpu_ext_info_handle.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
class AiCpuDynamicKernel : public DynamicKernel {
|
||||
public:
|
||||
AiCpuDynamicKernel(void *stream, const CNodePtr &cnode_ptr, const std::string &args, const std::string &ext_info_data,
|
||||
const std::string &so_name, const std::string &kernel_name)
|
||||
: DynamicKernel(stream, cnode_ptr),
|
||||
args_(args),
|
||||
ext_info_data_(ext_info_data),
|
||||
so_name_(so_name),
|
||||
kernel_name_(kernel_name),
|
||||
ext_info_handler_(nullptr),
|
||||
ext_info_addr_dev_(nullptr),
|
||||
ext_info_size_(0),
|
||||
input_num_(0),
|
||||
output_num_(0),
|
||||
unknow_type_(DEPEND_COMPUTE) {}
|
||||
|
||||
~AiCpuDynamicKernel() override;
|
||||
|
||||
void UpdateArgs() override;
|
||||
void Execute() override;
|
||||
void Initialize() override;
|
||||
void PostExecute() override;
|
||||
|
||||
// Get Compute Shape from ExtInfo
|
||||
bool UpdateOutputShapeFromExtInfo();
|
||||
|
||||
private:
|
||||
std::string args_;
|
||||
std::string ext_info_data_;
|
||||
std::string so_name_;
|
||||
std::string kernel_name_;
|
||||
|
||||
std::shared_ptr<AicpuExtInfoHandler> ext_info_handler_;
|
||||
void *ext_info_addr_dev_;
|
||||
size_t ext_info_size_;
|
||||
|
||||
size_t input_num_;
|
||||
size_t output_num_;
|
||||
|
||||
UnknowShapeOpType unknow_type_;
|
||||
|
||||
bool UpdateInputOutputAddr();
|
||||
bool UpdateExtInfo();
|
||||
};
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_AI_CPU_DYNAMIC_KERNEL_H_
|
|
@ -0,0 +1,218 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "runtime/device/ascend/executor/aicpu_ext_info_handle.h"
|
||||
#include <algorithm>
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
namespace {
|
||||
// if dim count is not reach kMaxShapeDims(8), use INT64_MIN to mark dim end.
|
||||
constexpr int64_t kDimEndFlag = INT64_MIN;
|
||||
} // namespace
|
||||
bool AicpuExtInfoHandler::Parse(const std::string &ext_info) {
|
||||
MS_LOG(INFO) << "Parse Node:" << node_name_ << " start";
|
||||
if (ext_info.empty()) {
|
||||
MS_LOG(ERROR) << "Node:" << node_name_ << " ext_info is empty";
|
||||
return false;
|
||||
}
|
||||
|
||||
ext_info_len_ = ext_info.size();
|
||||
ext_info_.reset(new (std::nothrow) uint8_t[ext_info_len_]);
|
||||
MS_EXCEPTION_IF_NULL(ext_info_);
|
||||
|
||||
(void)memcpy_s(ext_info_.get(), ext_info_len_, ext_info.c_str(), ext_info.size());
|
||||
|
||||
input_shape_and_type_.clear();
|
||||
output_shape_and_type_.clear();
|
||||
|
||||
auto ext_info_data = ext_info_.get();
|
||||
size_t offset = 0;
|
||||
while (offset + sizeof(AicpuExtInfo) <= ext_info_len_) {
|
||||
auto aicpu_ext_info = reinterpret_cast<AicpuExtInfo *>(ext_info_data + offset);
|
||||
MS_EXCEPTION_IF_NULL(aicpu_ext_info);
|
||||
switch (aicpu_ext_info->infoType) {
|
||||
case kernel::FWK_ADPT_EXT_SHAPE_TYPE:
|
||||
if (!ParseExtShapeType(aicpu_ext_info)) {
|
||||
MS_LOG(EXCEPTION) << "Parse ext shape type failed.";
|
||||
}
|
||||
break;
|
||||
case kernel::FWK_ADPT_EXT_INPUT_SHAPE:
|
||||
if (!ParseExtInputShape(aicpu_ext_info)) {
|
||||
MS_LOG(EXCEPTION) << "Parse ext input shape failed.";
|
||||
}
|
||||
break;
|
||||
case kernel::FWK_ADPT_EXT_OUTPUT_SHAPE:
|
||||
if (!ParseExtOutputShape(aicpu_ext_info)) {
|
||||
MS_LOG(EXCEPTION) << "Parse ext output shape failed.";
|
||||
}
|
||||
break;
|
||||
default:
|
||||
MS_LOG(INFO) << "Ignore Node:" << node_name_ << " infoType:" << aicpu_ext_info->infoType
|
||||
<< " infoLen:" << aicpu_ext_info->infoLen;
|
||||
break;
|
||||
}
|
||||
offset += sizeof(AicpuExtInfo);
|
||||
offset += aicpu_ext_info->infoLen;
|
||||
}
|
||||
|
||||
if (offset != ext_info_len_) {
|
||||
MS_LOG(EXCEPTION) << "Node:" << node_name_ << " ext_info format error, parse not reach end, offset=" << offset
|
||||
<< ", ext_info_len" << ext_info_len_;
|
||||
}
|
||||
MS_LOG(INFO) << "Node:" << node_name_ << " parse ext info end.";
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) {
|
||||
if (aicpu_ext_info->infoLen != sizeof(int32_t)) {
|
||||
MS_LOG(ERROR) << "Node:" << node_name_ << " parse ext shape type failed as infoLen must be " << sizeof(int32_t)
|
||||
<< " but got:" << aicpu_ext_info->infoLen;
|
||||
return false;
|
||||
}
|
||||
|
||||
auto type = reinterpret_cast<const int32_t *>(aicpu_ext_info->infoMsg);
|
||||
|
||||
if (*type != unknown_type_) {
|
||||
MS_LOG(ERROR) << "Node:" << node_name_ << " parse ext shape type failed as need:" << unknown_type_
|
||||
<< " but got:" << *type;
|
||||
}
|
||||
MS_LOG(INFO) << "Node:" << node_name_ << "parse ext shape type success infoLen=" << aicpu_ext_info->infoLen;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AicpuExtInfoHandler::ParseExtInputShape(AicpuExtInfo *aicpu_ext_info) {
|
||||
auto need_len = input_num_ * sizeof(AicpuShapeAndType);
|
||||
|
||||
if (aicpu_ext_info->infoLen != need_len) {
|
||||
MS_LOG(ERROR) << "Node:" << node_name_
|
||||
<< " parse ext input shape failed as aicpu_ext_info->infoLen:" << aicpu_ext_info->infoLen
|
||||
<< " and need_len:" << need_len;
|
||||
}
|
||||
auto input = reinterpret_cast<AicpuShapeAndType *>(aicpu_ext_info->infoMsg);
|
||||
|
||||
for (uint32_t index = 0; index < input_num_; ++index) {
|
||||
input_shape_and_type_.emplace_back(&input[index]);
|
||||
}
|
||||
MS_LOG(INFO) << "Node:" << node_name_.c_str() << " parse ext input shape success infoLen=" << aicpu_ext_info->infoLen;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) {
|
||||
auto need_len = output_num_ * sizeof(AicpuShapeAndType);
|
||||
if (aicpu_ext_info->infoLen != need_len) {
|
||||
MS_LOG(INFO) << "Node:" << node_name_
|
||||
<< " parse ext output shape failed, aicpu_ext_info->infoLen:" << aicpu_ext_info->infoLen
|
||||
<< " need_len:" << need_len;
|
||||
return false;
|
||||
}
|
||||
|
||||
auto output = reinterpret_cast<AicpuShapeAndType *>(aicpu_ext_info->infoMsg);
|
||||
for (uint32_t index = 0; index < output_num_; ++index) {
|
||||
output_shape_and_type_.emplace_back(&output[index]);
|
||||
}
|
||||
MS_LOG(INFO) << "Node:" << node_name_ << " parse ext output shape success infoLen=" << aicpu_ext_info->infoLen;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const NotNull<AnfNodePtr> &anf_node) {
|
||||
if (input_index >= input_num_) {
|
||||
MS_LOG(ERROR) << "input_index=" << input_index << " >= input_num_:" << input_num_;
|
||||
return false;
|
||||
}
|
||||
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);
|
||||
auto data_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);
|
||||
std::vector<int64_t> tmp_shape;
|
||||
std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(tmp_shape), SizeToLong);
|
||||
return UpdateShapeAndType(tmp_shape, data_type, NOT_NULL(input_shape_and_type_[input_index]));
|
||||
}
|
||||
|
||||
bool AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const NotNull<AnfNodePtr> &anf_node) {
|
||||
if (output_index >= output_num_) {
|
||||
MS_LOG(ERROR) << "output_index:" << output_index << " >= output_num_:" << output_num_;
|
||||
return false;
|
||||
}
|
||||
|
||||
auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);
|
||||
auto max_shape = AnfAlgo::GetOutputMaxShape(anf_node, output_index);
|
||||
if (shape.size() != max_shape.size()) {
|
||||
MS_LOG(ERROR) << "shape size != max_shape size";
|
||||
return true;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < shape.size(); ++i) {
|
||||
if (i < max_shape.size() && shape[i] == SIZE_MAX) {
|
||||
MS_LOG(INFO) << "Node:" << node_name_ << " update shape from SIZE_MAX to " << max_shape[i];
|
||||
shape[i] = max_shape[i];
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int64_t> tmp_shape;
|
||||
std::transform(shape.begin(), shape.end(), std::back_inserter(tmp_shape), SizeToLong);
|
||||
return UpdateShapeAndType(tmp_shape, AnfAlgo::GetOutputDeviceDataType(anf_node, output_index),
|
||||
NOT_NULL(output_shape_and_type_[output_index]));
|
||||
}
|
||||
|
||||
bool AicpuExtInfoHandler::GetOutputShapeAndType(uint32_t output_index, NotNull<std::vector<int64_t> *> shape,
|
||||
NotNull<TypeId *> data_type) {
|
||||
MS_LOG(INFO) << "Get " << node_name_ << " Output:" << output_index << " Shape And Type";
|
||||
GetShapeAndType(NOT_NULL(output_shape_and_type_[output_index]), shape, data_type);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AicpuExtInfoHandler::UpdateShapeAndType(const std::vector<int64_t> &shape, TypeId data_type,
|
||||
NotNull<AicpuShapeAndType *> shape_and_type) {
|
||||
if (shape.empty() || shape.size() > kernel::kMaxShapeDims) {
|
||||
MS_LOG(ERROR) << "Invalid shape:" << shape.size();
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t index = 0;
|
||||
for (; index < shape.size(); ++index) {
|
||||
shape_and_type->dims[index] = shape[index];
|
||||
}
|
||||
if (index < kernel::kMaxShapeDims) {
|
||||
shape_and_type->dims[index] = kDimEndFlag;
|
||||
}
|
||||
|
||||
// now only support update shape, type is not support
|
||||
return true;
|
||||
}
|
||||
|
||||
void AicpuExtInfoHandler::GetShapeAndType(NotNull<const AicpuShapeAndType *> shape_and_type,
|
||||
NotNull<std::vector<int64_t> *> shape, NotNull<TypeId *> data_type) {
|
||||
for (int64_t tmpDim : shape_and_type->dims) {
|
||||
if (tmpDim == kDimEndFlag) {
|
||||
break;
|
||||
}
|
||||
shape->emplace_back(tmpDim);
|
||||
MS_LOG(INFO) << "Debug tmpDim:" << tmpDim;
|
||||
}
|
||||
|
||||
auto ms_type = kernel::AicpuOpUtil::ProtoTypeToMsType(shape_and_type->type);
|
||||
if (ms_type == -1) {
|
||||
MS_LOG(EXCEPTION) << "Unspport Proto Type:" << shape_and_type->type;
|
||||
}
|
||||
MS_LOG(INFO) << "Debug ms_type:" << ms_type;
|
||||
*data_type = static_cast<TypeId>(ms_type);
|
||||
}
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,88 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_AICPU_EXT_INFO_HANDLE_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_AICPU_EXT_INFO_HANDLE_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
|
||||
#include "utils/contract.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
// for unknown shape op type
|
||||
enum UnknowShapeOpType {
|
||||
DEPEND_IN_SHAPE = 1, // op out shape get by input shape
|
||||
DEPEND_CONST_VALUE = 2, // op out shape get by const op value
|
||||
DEPEND_SHAPE_RANGE = 3, // op out shape get by range
|
||||
DEPEND_COMPUTE = 4 // op out shape get by totally computing
|
||||
};
|
||||
|
||||
using AicpuShapeAndType = kernel::ShapeAndType;
|
||||
using AicpuExtInfo = kernel::ExtInfo;
|
||||
|
||||
class AicpuExtInfoHandler {
|
||||
public:
|
||||
AicpuExtInfoHandler(std::string node_name, uint32_t input_num, uint32_t output_num, UnknowShapeOpType unknown_type)
|
||||
: node_name_(std::move(node_name)),
|
||||
input_num_(input_num),
|
||||
output_num_(output_num),
|
||||
unknown_type_(unknown_type),
|
||||
ext_info_len_(0) {}
|
||||
|
||||
~AicpuExtInfoHandler() = default;
|
||||
|
||||
uint8_t *GetExtInfo() const { return ext_info_.get(); }
|
||||
size_t GetExtInfoLen() const { return ext_info_len_; }
|
||||
|
||||
bool Parse(const std::string &ext_info);
|
||||
|
||||
bool UpdateInputShapeAndType(uint32_t input_index, const NotNull<AnfNodePtr> &anf_node);
|
||||
|
||||
bool UpdateOutputShapeAndType(uint32_t output_index, const NotNull<AnfNodePtr> &anf_node);
|
||||
|
||||
bool GetOutputShapeAndType(uint32_t output_index, NotNull<std::vector<int64_t> *> shape, NotNull<TypeId *> data_type);
|
||||
|
||||
private:
|
||||
bool ParseExtShapeType(AicpuExtInfo *aicpu_ext_info);
|
||||
bool ParseExtInputShape(AicpuExtInfo *aicpu_ext_info);
|
||||
bool ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info);
|
||||
|
||||
static bool UpdateShapeAndType(const std::vector<int64_t> &shape, TypeId data_type,
|
||||
NotNull<AicpuShapeAndType *> shape_and_type);
|
||||
|
||||
static void GetShapeAndType(NotNull<const AicpuShapeAndType *> shape_and_type, NotNull<std::vector<int64_t> *> shape,
|
||||
NotNull<TypeId *> data_type);
|
||||
|
||||
private:
|
||||
const std::string node_name_;
|
||||
const uint32_t input_num_;
|
||||
const uint32_t output_num_;
|
||||
UnknowShapeOpType unknown_type_;
|
||||
size_t ext_info_len_;
|
||||
|
||||
std::unique_ptr<uint8_t[]> ext_info_;
|
||||
std::vector<AicpuShapeAndType *> input_shape_and_type_;
|
||||
std::vector<AicpuShapeAndType *> output_shape_and_type_;
|
||||
};
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_AICPU_EXT_INFO_HANDLE_H_
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "runtime/device/ascend/executor/executor_callback.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
void ExecutorCallback::RegistCallback(const std::function<void()> &callback) {
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
callback_queue_.push(callback);
|
||||
}
|
||||
|
||||
void ExecutorCallback::Consume() {
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
while (!callback_queue_.empty()) {
|
||||
auto callback_func = callback_queue_.front();
|
||||
callback_queue_.pop();
|
||||
if (!callback_func) {
|
||||
MS_LOG(EXCEPTION) << "callback_func is empty";
|
||||
}
|
||||
callback_func();
|
||||
}
|
||||
}
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,49 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_EXECUTOR_CALLBACK_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_EXECUTOR_CALLBACK_H_
|
||||
|
||||
#include <queue>
|
||||
#include <mutex>
|
||||
#include <functional>
|
||||
#include "utils/ms_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
class ExecutorCallback {
|
||||
public:
|
||||
static ExecutorCallback &GetInstance() {
|
||||
static ExecutorCallback instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
void RegistCallback(const std::function<void()> &callback);
|
||||
void Consume();
|
||||
|
||||
private:
|
||||
ExecutorCallback() = default;
|
||||
~ExecutorCallback() = default;
|
||||
DISABLE_COPY_AND_ASSIGN(ExecutorCallback);
|
||||
|
||||
std::queue<std::function<void()>> callback_queue_;
|
||||
std::mutex lock_;
|
||||
};
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_EXECUTOR_CALLBACK_H_
|
|
@ -0,0 +1,187 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "runtime/device/ascend/executor/hccl_dynamic_kernel.h"
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <vector>
|
||||
#include "hccl/hcom.h"
|
||||
#include "common/opskernel/ge_task_info.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "runtime/device/kernel_runtime.h"
|
||||
#include "backend/kernel_compiler/hccl/hcom_util.h"
|
||||
|
||||
namespace {
|
||||
// Find so in RPATH or LD_LIBRARY_PATH (/usr/local/Ascend/fwkacllib/lib64/)
|
||||
constexpr auto kHcomGraphAdaptorPath = "libhcom_graph_adaptor.so";
|
||||
} // namespace
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
void HcclDynamicKernel::UpdateArgs() {
|
||||
if (!is_dynamic_shape_) {
|
||||
MS_LOG(INFO) << "Not Dynamic Shape";
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Start to UpdateArgs";
|
||||
auto kernel_mod = AnfAlgo::GetKernelMod(cnode_ptr_);
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||
// Update input, output, count
|
||||
AddressPtrList kernel_inputs;
|
||||
AddressPtrList kernel_workspaces;
|
||||
AddressPtrList kernel_outputs;
|
||||
KernelRuntime::GenLaunchArgs(*kernel_mod, cnode_ptr_, &kernel_inputs, &kernel_workspaces, &kernel_outputs);
|
||||
if (kernel_inputs.empty() || kernel_outputs.empty()) {
|
||||
MS_LOG(EXCEPTION) << "Inputs or outputs is empty";
|
||||
}
|
||||
auto input0 = kernel_inputs.at(0);
|
||||
auto output0 = kernel_outputs.at(0);
|
||||
MS_EXCEPTION_IF_NULL(input0);
|
||||
MS_EXCEPTION_IF_NULL(output0);
|
||||
|
||||
// Update Hccl input and output
|
||||
input_ptr_ = input0->addr;
|
||||
output_ptr_ = output0->addr;
|
||||
|
||||
std::vector<std::vector<size_t>> hccl_kernel_input_shape_list;
|
||||
if (!HcomUtil::GetKernelInputShape(cnode_ptr_, &hccl_kernel_input_shape_list)) {
|
||||
MS_LOG(EXCEPTION) << "GetKernelInputShape fail!";
|
||||
}
|
||||
|
||||
std::vector<HcclDataType> hccl_data_type_list;
|
||||
if (!HcomUtil::GetHcomDataType(cnode_ptr_, &hccl_data_type_list)) {
|
||||
MS_LOG(EXCEPTION) << "GetHcomDataType fail!";
|
||||
}
|
||||
|
||||
// Update Hccl count
|
||||
if (!HcomUtil::GetHcomCount(cnode_ptr_, hccl_data_type_list, hccl_kernel_input_shape_list, &count_)) {
|
||||
MS_LOG(EXCEPTION) << "GetHcomCount fail!";
|
||||
}
|
||||
MS_LOG(INFO) << "Update Hccl count:" << count_;
|
||||
}
|
||||
|
||||
void HcclDynamicKernel::StaticShapeExecute() {
|
||||
MS_EXCEPTION_IF_NULL(cnode_ptr_);
|
||||
auto kernel_mod = AnfAlgo::GetKernelMod(cnode_ptr_);
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||
AddressPtrList kernel_inputs;
|
||||
AddressPtrList kernel_workspaces;
|
||||
AddressPtrList kernel_outputs;
|
||||
KernelRuntime::GenLaunchArgs(*kernel_mod, cnode_ptr_, &kernel_inputs, &kernel_workspaces, &kernel_outputs);
|
||||
kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_);
|
||||
}
|
||||
|
||||
void HcclDynamicKernel::Execute() {
|
||||
MS_LOG(INFO) << "Start Execute";
|
||||
if (!is_dynamic_shape_) {
|
||||
MS_LOG(INFO) << "Not Dynamic, call hcom api";
|
||||
StaticShapeExecute();
|
||||
return;
|
||||
}
|
||||
auto handle = HcclExecutorManager::GetInstance().handle();
|
||||
auto EnqueueHcomOperation =
|
||||
(HcclResult(*)(ge::HcomOpertion, std::function<void(HcclResult status)>))dlsym(handle, "EnqueueHcomOpertion");
|
||||
if (EnqueueHcomOperation == nullptr) {
|
||||
MS_LOG(ERROR) << "Failed to get EnqueueHcomOperation function";
|
||||
if (dlclose(handle) != 0) {
|
||||
MS_LOG(WARNING) << "Failed to close hcom handle";
|
||||
}
|
||||
MS_LOG(EXCEPTION) << "Hccl dynamic kernel execute failed";
|
||||
return;
|
||||
}
|
||||
|
||||
ge::HcomOpertion op_info;
|
||||
op_info.hcclType = hccl_type_;
|
||||
op_info.inputPtr = input_ptr_;
|
||||
op_info.outputPtr = output_ptr_;
|
||||
op_info.dataType = data_type_;
|
||||
op_info.opType = op_type_;
|
||||
op_info.root = root_;
|
||||
op_info.count = count_;
|
||||
|
||||
auto callback = [this](HcclResult status) {
|
||||
if (status != HCCL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "HcomExcutorInitialize failed, ret:" << status;
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(this->hccl_mutex_);
|
||||
this->cond_.notify_all();
|
||||
MS_LOG(INFO) << "hccl callback success.";
|
||||
};
|
||||
|
||||
auto hccl_ret = EnqueueHcomOperation(op_info, callback);
|
||||
if (hccl_ret != HCCL_SUCCESS) {
|
||||
MS_LOG(EXCEPTION) << "Call EnqueueHcomOperation failed";
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> ulock(hccl_mutex_);
|
||||
cond_.wait(ulock);
|
||||
MS_LOG(INFO) << "Execute success";
|
||||
}
|
||||
|
||||
void HcclDynamicKernel::PostExecute() {}
|
||||
|
||||
bool HcclExecutorManager::Initialize() {
|
||||
if (initialized_) {
|
||||
return true;
|
||||
}
|
||||
initialized_ = true;
|
||||
MS_LOG(INFO) << "Start Initialize Hccl DynamicKernel";
|
||||
handle_ = dlopen(kHcomGraphAdaptorPath, RTLD_NOW | RTLD_GLOBAL);
|
||||
if (handle_ == nullptr) {
|
||||
MS_LOG(ERROR) << "dlopen failed, path:" << kHcomGraphAdaptorPath;
|
||||
return false;
|
||||
}
|
||||
|
||||
auto HcomExecutorInitialize = (HcclResult(*)())dlsym(handle_, "HcomExcutorInitialize");
|
||||
if (HcomExecutorInitialize == nullptr) {
|
||||
MS_LOG(ERROR) << "dlsym HcomExecutorInitialize failed";
|
||||
return false;
|
||||
}
|
||||
|
||||
HcclResult hccl_ret = HcomExecutorInitialize();
|
||||
if (hccl_ret == HCCL_E_PTR) {
|
||||
MS_LOG(WARNING) << "Hccl comm is null, hcom executor initialize is not required";
|
||||
} else if (hccl_ret == HCCL_SUCCESS) {
|
||||
MS_LOG(INFO) << "Hcom DynamicKernel Initialize success";
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Hcom DynamicKernel Initialize failed";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HcclExecutorManager::Finalize() {
|
||||
auto HcomExecutorFinalize = (HcclResult(*)())dlsym(handle_, "HcomExcutorFinalize");
|
||||
if (HcomExecutorFinalize == nullptr) {
|
||||
MS_LOG(ERROR) << "Faile to dlsym HcomExecutorFinalize";
|
||||
return false;
|
||||
}
|
||||
HcclResult hccl_ret = HcomExecutorFinalize();
|
||||
if (hccl_ret != HCCL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Hcom DynamicKernel Finalize failed";
|
||||
return false;
|
||||
}
|
||||
if (dlclose(handle_) != 0) {
|
||||
MS_LOG(ERROR) << "Failed to close hcom handle";
|
||||
return false;
|
||||
}
|
||||
MS_LOG(INFO) << "Hccl DynamicKernel Finalize failed";
|
||||
return true;
|
||||
}
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,82 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_HCCL_DYNAMIC_KERNEL_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_HCCL_DYNAMIC_KERNEL_H_
|
||||
|
||||
#include <condition_variable>
|
||||
#include <string>
|
||||
#include "runtime/device/executor/dynamic_kernel.h"
|
||||
|
||||
#include "utils/ms_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
class HcclDynamicKernel : public DynamicKernel {
|
||||
public:
|
||||
HcclDynamicKernel(const std::string &hccl_type, void *input_ptr, void *output_ptr, uint64_t count, int32_t data_type,
|
||||
int32_t op_type, int32_t root, void *stream, const CNodePtr &cnode_ptr)
|
||||
: DynamicKernel(stream, cnode_ptr),
|
||||
hccl_type_(hccl_type),
|
||||
input_ptr_(input_ptr),
|
||||
output_ptr_(output_ptr),
|
||||
count_(count),
|
||||
data_type_(data_type),
|
||||
op_type_(op_type),
|
||||
root_(root) {}
|
||||
~HcclDynamicKernel() override = default;
|
||||
void UpdateArgs() override;
|
||||
void Execute() override;
|
||||
void PostExecute() override;
|
||||
|
||||
private:
|
||||
std::string hccl_type_;
|
||||
void *input_ptr_;
|
||||
void *output_ptr_;
|
||||
uint64_t count_{0};
|
||||
int32_t data_type_{0};
|
||||
int32_t op_type_{0};
|
||||
int32_t root_{0};
|
||||
std::mutex hccl_mutex_;
|
||||
std::condition_variable cond_;
|
||||
|
||||
void StaticShapeExecute();
|
||||
};
|
||||
|
||||
class HcclExecutorManager {
|
||||
public:
|
||||
static HcclExecutorManager &GetInstance() {
|
||||
static HcclExecutorManager instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
bool Initialize();
|
||||
bool Finalize();
|
||||
void *handle() { return handle_; }
|
||||
|
||||
private:
|
||||
HcclExecutorManager() = default;
|
||||
~HcclExecutorManager() = default;
|
||||
DISABLE_COPY_AND_ASSIGN(HcclExecutorManager);
|
||||
|
||||
void *handle_{nullptr};
|
||||
bool initialized_{false};
|
||||
};
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_HCCL_DYNAMIC_KERNEL_H_
|
|
@ -0,0 +1,36 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_HOST_DYNAMIC_KERNEL_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_HOST_DYNAMIC_KERNEL_H_
|
||||
|
||||
#include "runtime/device/executor/dynamic_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
class HostDynamicKernel : public DynamicKernel {
|
||||
public:
|
||||
HostDynamicKernel(void *stream, const CNodePtr &cnode_ptr) : DynamicKernel(stream, cnode_ptr) {}
|
||||
~HostDynamicKernel() override = default;
|
||||
void UpdateArgs() override {}
|
||||
void Execute() override = 0;
|
||||
void PostExecute() override {}
|
||||
};
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_HOST_DYNAMIC_KERNEL_H_
|
|
@ -0,0 +1,32 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "runtime/device/ascend/executor/rts/memcpy_rts_dynamic_kernel.h"
|
||||
|
||||
#include "runtime/mem.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
void MemcpyRtsDynamicKernel::Execute() {
|
||||
auto status = rtMemcpyAsync(dst_, dest_max_, src_, count_, RT_MEMCPY_DEVICE_TO_DEVICE, stream_);
|
||||
if (status != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "MemCpyAsync op rtMemcpyAsync failed!";
|
||||
}
|
||||
}
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,45 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_RTS_MEMCPY_RTS_DYNAMIC_KERNEL_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_RTS_MEMCPY_RTS_DYNAMIC_KERNEL_H_
|
||||
|
||||
#include "runtime/device/executor/dynamic_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
class MemcpyRtsDynamicKernel : public DynamicKernel {
|
||||
public:
|
||||
MemcpyRtsDynamicKernel(void *stream, const CNodePtr &cnode_ptr, void *dst, uint32_t dest_max, void *src,
|
||||
uint32_t count)
|
||||
: DynamicKernel(stream, cnode_ptr), dst_(dst), dest_max_(dest_max), src_(src), count_(count) {}
|
||||
~MemcpyRtsDynamicKernel() override = default;
|
||||
|
||||
void UpdateArgs() override {}
|
||||
void Execute() override;
|
||||
void PostExecute() override {}
|
||||
|
||||
private:
|
||||
void *dst_;
|
||||
uint32_t dest_max_;
|
||||
void *src_;
|
||||
uint32_t count_;
|
||||
};
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_RTS_MEMCPY_RTS_DYNAMIC_KERNEL_H_
|
|
@ -0,0 +1,32 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "runtime/device/ascend/executor/rts/profiling_rts_dynamic_kernel.h"
|
||||
|
||||
#include "runtime/base.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
void ProfilingRtsDynamicKernel::Execute() {
|
||||
auto rt_ret = rtProfilerTrace(log_id_, notify_, flags_, stream_);
|
||||
if (rt_ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "Call rtProfilerTrace failed";
|
||||
}
|
||||
}
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,43 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_RTS_PROFILING_RTS_DYNAMIC_KERNEL_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_RTS_PROFILING_RTS_DYNAMIC_KERNEL_H_
|
||||
|
||||
#include "runtime/device/executor/dynamic_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
class ProfilingRtsDynamicKernel : public DynamicKernel {
|
||||
public:
|
||||
ProfilingRtsDynamicKernel(void *stream, const CNodePtr &cnode_ptr, uint64_t log_id, bool notify, uint32_t flags)
|
||||
: DynamicKernel(stream, cnode_ptr), log_id_(log_id), notify_(notify), flags_(flags) {}
|
||||
~ProfilingRtsDynamicKernel() override = default;
|
||||
|
||||
void UpdateArgs() override {}
|
||||
void Execute() override;
|
||||
void PostExecute() override {}
|
||||
|
||||
private:
|
||||
uint64_t log_id_;
|
||||
bool notify_;
|
||||
uint32_t flags_;
|
||||
};
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_RTS_PROFILING_RTS_DYNAMIC_KERNEL_H_
|
|
@ -0,0 +1,188 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "runtime/device/ascend/executor/tiling/op_tiling_calculater.h"
|
||||
#include <dlfcn.h>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "runtime/device/ascend/ge_types_convert.h"
|
||||
#include "utils/utils.h"
|
||||
#include "external/graph/tensor.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
ge::Tensor MakeTempGeTensor(TypeId type_id) {
|
||||
auto ge_type = GeTypesConvert::TransTypeIdToGeDataType(type_id);
|
||||
ge::TensorDesc tensor_desc;
|
||||
tensor_desc.SetDataType(ge_type);
|
||||
ge::Tensor ge_tensor;
|
||||
ge_tensor.SetTensorDesc(tensor_desc);
|
||||
return ge_tensor;
|
||||
}
|
||||
|
||||
void FeedTeOpTensorInputArg(const NotNull<CNodePtr> &cnode,
|
||||
NotNull<std::vector<optiling::TeOpTensorArg> *> tensor_arg_list) {
|
||||
MS_LOG(INFO) << "FeedTeOpTensorInputArg start, node:" << cnode->fullname_with_scope();
|
||||
auto input_size = AnfAlgo::GetInputTensorNum(cnode.get());
|
||||
|
||||
// Skip Dynamic Shape Depend Input
|
||||
|
||||
for (size_t i = 0; i < input_size; ++i) {
|
||||
auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(cnode.get(), i);
|
||||
auto input_node = input_node_with_index.first;
|
||||
auto input_index = input_node_with_index.second;
|
||||
auto output_shape = AnfAlgo::GetOutputDeviceShape(input_node, input_index);
|
||||
auto output_format = AnfAlgo::GetOutputFormat(input_node, input_index);
|
||||
auto output_dtype = AnfAlgo::GetOutputDeviceDataType(input_node, input_index);
|
||||
auto iter = type_name_map.find(output_dtype);
|
||||
if (iter == type_name_map.end()) {
|
||||
MS_LOG(EXCEPTION) << "Cannot found typeId:" << output_dtype;
|
||||
}
|
||||
auto ge_output_dtype = iter->second;
|
||||
|
||||
optiling::TeOpTensorArg tensor_arg;
|
||||
optiling::TeOpTensor tensor;
|
||||
tensor_arg.arg_type = optiling::TA_SINGLE;
|
||||
tensor.dtype = ge_output_dtype;
|
||||
tensor.shape.insert(tensor.shape.end(), output_shape.begin(), output_shape.end());
|
||||
|
||||
tensor.format = GeTypesConvert::GetGeTilingFormat(GeTypesConvert::GetGeFormat(output_format, output_shape.size()));
|
||||
MS_LOG(INFO) << "Tiling Format:" << tensor.format;
|
||||
tensor_arg.tensor.emplace_back(tensor);
|
||||
tensor_arg_list->emplace_back(tensor_arg);
|
||||
}
|
||||
}
|
||||
|
||||
void FeedTeOpTensorOutputArg(const NotNull<CNodePtr> &cnode,
|
||||
NotNull<std::vector<optiling::TeOpTensorArg> *> tensor_arg_list) {
|
||||
MS_LOG(INFO) << "FeedTeOpTensorOutputArg start, node:" << cnode->fullname_with_scope();
|
||||
auto output_size = AnfAlgo::GetOutputTensorNum(cnode.get());
|
||||
for (size_t i = 0; i < output_size; ++i) {
|
||||
auto output_shape = AnfAlgo::GetOutputDeviceShape(cnode.get(), i);
|
||||
auto output_format = AnfAlgo::GetOutputFormat(cnode.get(), i);
|
||||
auto data_type = AnfAlgo::GetOutputDeviceDataType(cnode.get(), i);
|
||||
auto iter = type_name_map.find(data_type);
|
||||
if (iter == type_name_map.end()) {
|
||||
MS_LOG(EXCEPTION) << "Cannot found typeId:" << data_type;
|
||||
}
|
||||
|
||||
optiling::TeOpTensorArg tensor_arg;
|
||||
optiling::TeOpTensor tensor;
|
||||
tensor_arg.arg_type = optiling::TA_SINGLE;
|
||||
tensor.dtype = iter->second;
|
||||
tensor.shape.insert(tensor.shape.end(), output_shape.begin(), output_shape.end());
|
||||
tensor.format = GeTypesConvert::GetGeTilingFormat(GeTypesConvert::GetGeFormat(output_format, output_shape.size()));
|
||||
MS_LOG(INFO) << "Tiling Format:" << tensor.format;
|
||||
tensor_arg.tensor.emplace_back(tensor);
|
||||
tensor_arg_list->emplace_back(tensor_arg);
|
||||
}
|
||||
}
|
||||
|
||||
void FeedTeOpConstTensor(const NotNull<CNodePtr> &cnode, const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map,
|
||||
NotNull<std::map<std::string, optiling::TeConstTensorData> *> const_inputs) {
|
||||
MS_LOG(INFO) << "FeedTeOpConstTensor start, node:" << cnode->fullname_with_scope();
|
||||
if (!AnfAlgo::HasNodeAttr(kDynamicShapeDepends, cnode.get())) {
|
||||
MS_LOG(INFO) << "No input depend found, " << cnode->fullname_with_scope();
|
||||
return;
|
||||
}
|
||||
|
||||
auto depends_list = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode.get(), kDynamicShapeDepends);
|
||||
for (auto index : depends_list) {
|
||||
auto iter = depend_tensor_map.find(IntToSize(index));
|
||||
if (iter == depend_tensor_map.end()) {
|
||||
MS_LOG(EXCEPTION) << "Index not found in depend_tensor_map";
|
||||
}
|
||||
|
||||
auto const_tensor = iter->second;
|
||||
|
||||
auto have_input_names_attr = AnfAlgo::HasNodeAttr("input_names", cnode);
|
||||
if (!have_input_names_attr) {
|
||||
MS_LOG(EXCEPTION) << "cnode:" << cnode->fullname_with_scope() << " no input_names attr";
|
||||
}
|
||||
auto input_names_attr = AnfAlgo::GetNodeAttr<std::vector<std::string>>(cnode.get(), "input_names");
|
||||
if (IntToSize(index) >= input_names_attr.size()) {
|
||||
MS_LOG(EXCEPTION) << "input index" << index << " >= input_name_attr.size:" << input_names_attr.size();
|
||||
}
|
||||
auto input_name = input_names_attr[index];
|
||||
MS_LOG(INFO) << "input_name is " << input_name;
|
||||
auto type_id = AnfAlgo::GetPrevNodeOutputDeviceDataType(cnode.get(), index);
|
||||
const_inputs->try_emplace(
|
||||
input_name, optiling::TeConstTensorData{static_cast<const uint8_t *>(const_tensor->data_c()),
|
||||
IntToSize(const_tensor->DataSize()), MakeTempGeTensor(type_id)});
|
||||
}
|
||||
MS_LOG(INFO) << "FeedTeOpConstTensor end";
|
||||
}
|
||||
|
||||
void OpTilingCalculater::Init() {
|
||||
MS_LOG(INFO) << "Start init OpTilingCalculater";
|
||||
tiling_func_map_ = optiling::OpTilingInterf::RegisteredOpInterf();
|
||||
MS_LOG(INFO) << "tiling_func_map_ size:" << tiling_func_map_.size();
|
||||
for (const auto &iter : tiling_func_map_) {
|
||||
MS_LOG(INFO) << "Regist tiling func:" << iter.first;
|
||||
}
|
||||
}
|
||||
|
||||
std::string GetRealOpType(const std::string &op_type) {
|
||||
static const std::map<std::string, std::string> kOpTypeMap = {
|
||||
{"SparseApplyFtrl", "SparseApplyFtrlD"},
|
||||
};
|
||||
auto iter = kOpTypeMap.find(op_type);
|
||||
if (iter == kOpTypeMap.end()) {
|
||||
return op_type;
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
void OpTilingCalculater::CalculateTiling(const NotNull<CNodePtr> &cnode,
|
||||
const NotNull<std::shared_ptr<nlohmann::json>> &compile_info_json,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map,
|
||||
NotNull<optiling::OpRunInfo *> op_run_info) {
|
||||
optiling::TeOpParas op_param;
|
||||
std::string op_type = AnfAlgo::GetCNodeName(cnode.get());
|
||||
MS_LOG(INFO) << "[DynamicShape] calculate tiling, op_type:" << op_type;
|
||||
|
||||
FeedTeOpTensorInputArg(cnode, NOT_NULL(&op_param.inputs));
|
||||
FeedTeOpTensorOutputArg(cnode, NOT_NULL(&op_param.outputs));
|
||||
FeedTeOpConstTensor(cnode, depend_tensor_map, NOT_NULL(&op_param.const_inputs));
|
||||
|
||||
op_type = GetRealOpType(op_type);
|
||||
auto iter = tiling_func_map_.find(op_type);
|
||||
if (iter == tiling_func_map_.end()) {
|
||||
iter = tiling_func_map_.find("AutoTiling");
|
||||
if (iter == tiling_func_map_.end()) {
|
||||
MS_LOG(EXCEPTION) << "AutoTiling Func Not Found";
|
||||
}
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "Get tiling func:" << iter->first;
|
||||
|
||||
if (iter != tiling_func_map_.end()) {
|
||||
bool ret = (iter->second)(op_type, op_param, *compile_info_json.get(), *op_run_info);
|
||||
if (!ret) {
|
||||
MS_LOG(EXCEPTION) << "Calculate tiling failed";
|
||||
}
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Tiling func not found";
|
||||
}
|
||||
MS_LOG(INFO) << "CalculateTiling success";
|
||||
}
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,55 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_TILING_OP_TILING_CALCULATE_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_TILING_OP_TILING_CALCULATE_H_
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "utils/ms_utils.h"
|
||||
#include "utils/contract.h"
|
||||
#include "ir/anf.h"
|
||||
#include "ir/tensor.h"
|
||||
#include "register/op_tiling.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
class OpTilingCalculater {
|
||||
public:
|
||||
static OpTilingCalculater &GetInstance() {
|
||||
static OpTilingCalculater instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
void Init();
|
||||
void CalculateTiling(const NotNull<CNodePtr> &cnode,
|
||||
const NotNull<std::shared_ptr<nlohmann::json>> &compile_info_json,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map,
|
||||
NotNull<optiling::OpRunInfo *> op_run_info);
|
||||
|
||||
private:
|
||||
OpTilingCalculater() = default;
|
||||
~OpTilingCalculater() = default;
|
||||
DISABLE_COPY_AND_ASSIGN(OpTilingCalculater);
|
||||
|
||||
std::map<std::string, optiling::OpTilingFunc> tiling_func_map_;
|
||||
};
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_TILING_OP_TILING_CALCULATE_H_
|
|
@ -0,0 +1,137 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "runtime/device/ascend/ge_types_convert.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
ge::proto::DataType GeTypesConvert::GetGeDataType(TypeId type_id) {
|
||||
static const std::map<TypeId, ge::proto::DataType> data_type_map = {
|
||||
{TypeId::kTypeUnknown, ge::proto::DT_UNDEFINED}, {TypeId::kNumberTypeFloat32, ge::proto::DT_FLOAT},
|
||||
{TypeId::kNumberTypeFloat16, ge::proto::DT_FLOAT16}, {TypeId::kNumberTypeInt8, ge::proto::DT_INT8},
|
||||
{TypeId::kNumberTypeUInt8, ge::proto::DT_UINT8}, {TypeId::kNumberTypeInt16, ge::proto::DT_INT16},
|
||||
{TypeId::kNumberTypeUInt16, ge::proto::DT_UINT16}, {TypeId::kNumberTypeInt32, ge::proto::DT_INT32},
|
||||
{TypeId::kNumberTypeInt64, ge::proto::DT_INT64}, {TypeId::kNumberTypeUInt32, ge::proto::DT_UINT32},
|
||||
{TypeId::kNumberTypeUInt64, ge::proto::DT_UINT64}, {TypeId::kNumberTypeBool, ge::proto::DT_BOOL},
|
||||
{TypeId::kNumberTypeFloat64, ge::proto::DT_DOUBLE},
|
||||
};
|
||||
MS_LOG(INFO) << "Vm origin type_id:" << type_id;
|
||||
auto iter = data_type_map.find(type_id);
|
||||
if (iter == data_type_map.end()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid data type:" << type_id;
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
ge::DataType GeTypesConvert::TransTypeIdToGeDataType(TypeId type_id) {
|
||||
static const std::map<TypeId, ge::DataType> data_type_map = {
|
||||
{TypeId::kNumberTypeFloat, ge::DataType::DT_FLOAT}, {TypeId::kNumberTypeFloat32, ge::DataType::DT_FLOAT},
|
||||
{TypeId::kNumberTypeFloat16, ge::DataType::DT_FLOAT16}, {TypeId::kNumberTypeInt8, ge::DataType::DT_INT8},
|
||||
{TypeId::kNumberTypeInt16, ge::DataType::DT_INT16}, {TypeId::kNumberTypeUInt16, ge::DataType::DT_UINT16},
|
||||
{TypeId::kNumberTypeUInt8, ge::DataType::DT_UINT8}, {TypeId::kNumberTypeInt32, ge::DataType::DT_INT32},
|
||||
{TypeId::kNumberTypeInt, ge::DataType::DT_INT32}, {TypeId::kNumberTypeInt64, ge::DataType::DT_INT64},
|
||||
{TypeId::kNumberTypeUInt32, ge::DataType::DT_UINT32}, {TypeId::kNumberTypeUInt, ge::DataType::DT_UINT32},
|
||||
{TypeId::kNumberTypeUInt64, ge::DataType::DT_UINT64}, {TypeId::kNumberTypeBool, ge::DataType::DT_BOOL},
|
||||
{TypeId::kNumberTypeInt64, ge::DataType::DT_DOUBLE}, {TypeId::kTypeUnknown, ge::DataType::DT_UNDEFINED}};
|
||||
auto iter = data_type_map.find(type_id);
|
||||
if (iter == data_type_map.end()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid data type:" << type_id;
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
GeFormat GeTypesConvert::GetGeFormat(const std::string &format, size_t shape_size) {
|
||||
static const std::map<std::string, GeFormat> format_map = {
|
||||
// default format: nchw, fractal_nz?
|
||||
{kOpFormat_DEFAULT, kFormat_NCHW},
|
||||
{kOpFormat_NC1KHKWHWC0, kFormat_NC1KHKWHWC0},
|
||||
{kOpFormat_ND, kFormat_ND},
|
||||
{kOpFormat_NCHW, kFormat_NCHW},
|
||||
{kOpFormat_NHWC, kFormat_NHWC},
|
||||
{kOpFormat_HWCN, kFormat_HWCN},
|
||||
{kOpFormat_NC1HWC0, kFormat_NC1HWC0},
|
||||
{kOpFormat_FRAC_Z, kFormat_FRACTAL_Z},
|
||||
{kOpFormat_FRAC_NZ, kFormat_FRACTAL_NZ},
|
||||
{kOpFormat_C1HWNCoC0, kFormat_C1HWNCoC0},
|
||||
{kOpFormat_NC1HWC0_C04, kFormat_NC1HWC0_C04},
|
||||
{kOpFormat_FRACTAL_Z_C04, kFormat_FRACTAL_Z_C04},
|
||||
{kOpFormat_NDHWC, kFormat_NDHWC},
|
||||
};
|
||||
MS_LOG(INFO) << "GetGeFormat format:" << format << " shape_size:" << shape_size;
|
||||
if (format == kOpFormat_DEFAULT) {
|
||||
return shape_size == 4 ? kFormat_NCHW : kFormat_ND;
|
||||
}
|
||||
auto iter = format_map.find(format);
|
||||
if (iter == format_map.end()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid format:" << format;
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
std::string GeTypesConvert::GetGeTilingFormat(GeFormat ge_format) {
|
||||
static const std::map<GeFormat, std::string> kFormatToStringMap = {
|
||||
{kFormat_NCHW, "NCHW"},
|
||||
{kFormat_NHWC, "NHWC"},
|
||||
{kFormat_ND, "ND"},
|
||||
{kFormat_NC1HWC0, "NC1HWC0"},
|
||||
{kFormat_FRACTAL_Z, "FRACTAL_Z"},
|
||||
{kFormat_NC1C0HWPAD, "NC1C0HWPAD"},
|
||||
{kFormat_NHWC1C0, "NHWC1C0"},
|
||||
{kFormat_FSR_NCHW, "FSR_NCHW"},
|
||||
{kFormat_FRACTAL_DECONV, "FRACTAL_DECONV"},
|
||||
{kFormat_C1HWNC0, "C1HWNC0"},
|
||||
{kFormat_FRACTAL_DECONV_TRANSPOSE, "FRACTAL_DECONV_TRANSPOSE"},
|
||||
{kFormat_FRACTAL_DECONV_SP_STRIDE_TRANS, "FRACTAL_DECONV_SP_STRIDE_TRANS"},
|
||||
{kFormat_NC1HWC0_C04, "NC1HWC0_C04"},
|
||||
{kFormat_FRACTAL_Z_C04, "FRACTAL_Z_C04"},
|
||||
{kFormat_CHWN, "CHWN"},
|
||||
{kFormat_FRACTAL_DECONV_SP_STRIDE8_TRANS, "DECONV_SP_STRIDE8_TRANS"},
|
||||
{kFormat_NC1KHKWHWC0, "NC1KHKWHWC0"},
|
||||
{kFormat_BN_WEIGHT, "BN_WEIGHT"},
|
||||
{kFormat_FILTER_HWCK, "FILTER_HWCK"},
|
||||
{kFormat_HWCN, "HWCN"},
|
||||
{kFormat_HASHTABLE_LOOKUP_LOOKUPS, "LOOKUP_LOOKUPS"},
|
||||
{kFormat_HASHTABLE_LOOKUP_KEYS, "LOOKUP_KEYS"},
|
||||
{kFormat_HASHTABLE_LOOKUP_VALUE, "LOOKUP_VALUE"},
|
||||
{kFormat_HASHTABLE_LOOKUP_OUTPUT, "LOOKUP_OUTPUT"},
|
||||
{kFormat_HASHTABLE_LOOKUP_HITS, "LOOKUP_HITS"},
|
||||
{kFormat_MD, "MD"},
|
||||
{kFormat_NDHWC, "NDHWC"},
|
||||
{kFormat_NCDHW, "NCDHW"},
|
||||
{kFormat_DHWCN, "DHWCN"},
|
||||
{kFormat_DHWNC, "DHWNC"},
|
||||
{kFormat_NDC1HWC0, "NDC1HWC0"},
|
||||
{kFormat_FRACTAL_Z_3D, "FRACTAL_Z_3D"},
|
||||
{kFormat_FRACTAL_Z_3D_TRANSPOSE, "FRACTAL_Z_3D_TRANSPOSE"},
|
||||
{kFormat_C1HWNCoC0, "C1HWNCoC0"},
|
||||
{kFormat_FRACTAL_NZ, "FRACTAL_NZ"},
|
||||
{kFormat_CN, "CN"},
|
||||
{kFormat_NC, "NC"},
|
||||
{kFormat_FRACTAL_ZN_LSTM, "FRACTAL_ZN_LSTM"},
|
||||
{kFormat_FRACTAL_Z_G, "FRACTAL_Z_G"},
|
||||
{kFormat_RESERVED, "FORMAT_RESERVED"},
|
||||
{kFormat_ALL, "ALL"}};
|
||||
|
||||
auto iter = kFormatToStringMap.find(ge_format);
|
||||
if (iter == kFormatToStringMap.end()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid ge_format:" << ge_format;
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -22,28 +22,11 @@
|
|||
#include "proto/ge_dtype.pb.h"
|
||||
#include "ir/dtype/type_id.h"
|
||||
#include "utils/utils.h"
|
||||
#include "external/graph/types.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
static ge::proto::DataType GetGeDataType(TypeId type_id) {
|
||||
static const std::map<TypeId, ge::proto::DataType> data_type_map = {
|
||||
{TypeId::kTypeUnknown, ge::proto::DT_UNDEFINED}, {TypeId::kNumberTypeFloat32, ge::proto::DT_FLOAT},
|
||||
{TypeId::kNumberTypeFloat16, ge::proto::DT_FLOAT16}, {TypeId::kNumberTypeInt8, ge::proto::DT_INT8},
|
||||
{TypeId::kNumberTypeUInt8, ge::proto::DT_UINT8}, {TypeId::kNumberTypeInt16, ge::proto::DT_INT16},
|
||||
{TypeId::kNumberTypeUInt16, ge::proto::DT_UINT16}, {TypeId::kNumberTypeInt32, ge::proto::DT_INT32},
|
||||
{TypeId::kNumberTypeInt64, ge::proto::DT_INT64}, {TypeId::kNumberTypeUInt32, ge::proto::DT_UINT32},
|
||||
{TypeId::kNumberTypeUInt64, ge::proto::DT_UINT64}, {TypeId::kNumberTypeBool, ge::proto::DT_BOOL},
|
||||
{TypeId::kNumberTypeFloat64, ge::proto::DT_DOUBLE},
|
||||
};
|
||||
MS_LOG(INFO) << "Vm origin type_id:" << type_id;
|
||||
auto iter = data_type_map.find(type_id);
|
||||
if (iter == data_type_map.end()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid data type:" << type_id;
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
enum GeFormat {
|
||||
kFormat_NCHW = 0, // NCHW
|
||||
kFormat_NHWC, // NHWC
|
||||
|
@ -83,37 +66,21 @@ enum GeFormat {
|
|||
kFormat_NC,
|
||||
kFormat_DHWNC,
|
||||
kFormat_FRACTAL_Z_3D_TRANSPOSE, // 3D filter(transpose) input tensor format
|
||||
kFormat_FRACTAL_ZN_LSTM,
|
||||
kFormat_FRACTAL_Z_G,
|
||||
kFormat_RESERVED,
|
||||
kFormat_ALL
|
||||
};
|
||||
|
||||
static GeFormat GetGeFormat(const std::string &format, size_t shape_size) {
|
||||
static const std::map<std::string, GeFormat> format_map = {
|
||||
// default format: nchw, fractal_nz?
|
||||
{kOpFormat_DEFAULT, kFormat_NCHW},
|
||||
{kOpFormat_NC1KHKWHWC0, kFormat_NC1KHKWHWC0},
|
||||
{kOpFormat_ND, kFormat_ND},
|
||||
{kOpFormat_NCHW, kFormat_NCHW},
|
||||
{kOpFormat_NHWC, kFormat_NHWC},
|
||||
{kOpFormat_HWCN, kFormat_HWCN},
|
||||
{kOpFormat_NC1HWC0, kFormat_NC1HWC0},
|
||||
{kOpFormat_FRAC_Z, kFormat_FRACTAL_Z},
|
||||
{kOpFormat_FRAC_NZ, kFormat_FRACTAL_NZ},
|
||||
{kOpFormat_C1HWNCoC0, kFormat_C1HWNCoC0},
|
||||
{kOpFormat_NC1HWC0_C04, kFormat_NC1HWC0_C04},
|
||||
{kOpFormat_FRACTAL_Z_C04, kFormat_FRACTAL_Z_C04},
|
||||
{kOpFormat_NDHWC, kFormat_NDHWC},
|
||||
};
|
||||
MS_LOG(INFO) << "GetGeFormat format:" << format << " shape_size:" << shape_size;
|
||||
if (format == kOpFormat_DEFAULT) {
|
||||
return shape_size == 4 ? kFormat_NCHW : kFormat_ND;
|
||||
}
|
||||
auto iter = format_map.find(format);
|
||||
if (iter == format_map.end()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid format:" << format;
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
class GeTypesConvert {
|
||||
public:
|
||||
GeTypesConvert() = default;
|
||||
~GeTypesConvert() = default;
|
||||
static ge::proto::DataType GetGeDataType(TypeId type_id);
|
||||
static GeFormat GetGeFormat(const std::string &format, size_t shape_size);
|
||||
static std::string GetGeTilingFormat(GeFormat ge_format);
|
||||
static ge::DataType TransTypeIdToGeDataType(TypeId type_id);
|
||||
};
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -27,6 +27,7 @@
|
|||
#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
|
||||
#include "backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"
|
||||
#include "backend/kernel_compiler/host/host_kernel_build.h"
|
||||
#include "backend/kernel_compiler/hccl/hccl_kernel_build.h"
|
||||
#include "backend/kernel_compiler/rts/rt_kernel_build.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_utils.h"
|
||||
|
@ -47,6 +48,10 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) {
|
|||
kernel_mod_ptr = kernel::AicpuOpBuild(anf_node);
|
||||
break;
|
||||
}
|
||||
case KernelType::HOST_KERNEL: {
|
||||
kernel_mod_ptr = kernel::HostOpBuild(anf_node);
|
||||
break;
|
||||
}
|
||||
case KernelType::RT_KERNEL: {
|
||||
kernel_mod_ptr = kernel::RtOpBuild(anf_node);
|
||||
break;
|
||||
|
|
|
@ -22,6 +22,10 @@
|
|||
#include <utility>
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include "utils/ms_utils.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.h"
|
||||
#include "debug/anf_ir_dump.h"
|
||||
#include "frontend/operator/ops.h"
|
||||
#include "utils/ms_context.h"
|
||||
|
@ -493,7 +497,7 @@ void SetTensorDeviceInfo(const kernel::KernelBuildInfo &selected_kernel_info, co
|
|||
}
|
||||
// we set special device info of a input tensor.
|
||||
bool is_ref = false;
|
||||
auto op_info = kernel::OpLib::FindOp(AnfAlgo::GetCNodeName(kernel_node), kernel::kTBE);
|
||||
auto op_info = kernel::tbe::TbeDynamicShapeUtil::FindOp(AnfAlgo::GetCNodeName(kernel_node), kernel_node);
|
||||
if (op_info != nullptr) {
|
||||
is_ref = op_info->is_ref();
|
||||
}
|
||||
|
|
|
@ -44,6 +44,8 @@ class CPUKernelRuntime : public KernelRuntime {
|
|||
VectorRef *outputs);
|
||||
void IncreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs);
|
||||
void DecreaseSummaryRefCount(const session::NamedSummaryOutputs &summary_outputs);
|
||||
bool GenDynamicKernel(const session::KernelGraph *graph) override { return true; }
|
||||
bool RunDynamicKernelAsync(const session::KernelGraph *graph) override { return true; }
|
||||
|
||||
protected:
|
||||
bool SyncStream() override { return true; };
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "runtime/device/executor/dynamic_kernel.h"
|
||||
#include <vector>
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "common/trans.h"
|
||||
#include "pipeline/jit/static_analysis/static_analysis.h"
|
||||
#include "abstract/dshape.h"
|
||||
#include "abstract/param_validator.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
void DynamicKernel::Initialize() {
|
||||
MS_LOG(INFO) << "Init Start";
|
||||
is_dynamic_shape_ = AnfAlgo::IsDynamicShape(cnode_ptr_);
|
||||
if (!is_dynamic_shape_) {
|
||||
MS_LOG(INFO) << "cnode is not dynamic shape:" << cnode_ptr_->fullname_with_scope();
|
||||
return;
|
||||
}
|
||||
|
||||
is_input_dynamic_shape_ = AnfAlgo::GetBooleanAttr(cnode_ptr_, kAttrInputIsDynamicShape);
|
||||
is_output_dynamic_shape_ = AnfAlgo::GetBooleanAttr(cnode_ptr_, kAttrOutputIsDynamicShape);
|
||||
|
||||
auto have_depends = AnfAlgo::HasNodeAttr(kDynamicShapeDepends, cnode_ptr_);
|
||||
if (!have_depends) {
|
||||
MS_LOG(WARNING) << "No dynamic_shape_depends found";
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Have depends";
|
||||
auto depends_list = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode_ptr_, kDynamicShapeDepends);
|
||||
// Save depend input tensor. Sync data in InferShape.
|
||||
for (auto depend : depends_list) {
|
||||
auto pre_node_with_index = AnfAlgo::GetPrevNodeOutput(cnode_ptr_, depend);
|
||||
auto output_addr = AnfAlgo::GetPrevNodeMutableOutputAddr(cnode_ptr_, depend);
|
||||
std::vector<int> shapes = trans::GetRuntimePaddingShape(pre_node_with_index.first, pre_node_with_index.second);
|
||||
auto host_type = AnfAlgo::GetOutputInferDataType(pre_node_with_index.first, pre_node_with_index.second);
|
||||
auto out_tensor = std::make_shared<tensor::Tensor>(host_type, shapes);
|
||||
out_tensor->set_device_address(output_addr);
|
||||
|
||||
auto ret = depend_tensor_map_.try_emplace(depend, out_tensor);
|
||||
if (!ret.second) {
|
||||
MS_LOG(EXCEPTION) << "Insert map failed";
|
||||
}
|
||||
}
|
||||
MS_LOG(INFO) << "Init End";
|
||||
}
|
||||
|
||||
bool IsTupleGetItem(const AnfNodePtr &anf_node) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
if (!anf_node->isa<CNode>()) {
|
||||
return false;
|
||||
}
|
||||
auto cnode = anf_node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto input0 = cnode->input(0);
|
||||
return IsPrimitive(input0, prim::kPrimTupleGetItem);
|
||||
}
|
||||
|
||||
void DynamicKernel::InferShape() {
|
||||
if (!is_input_dynamic_shape_ && is_output_dynamic_shape_ && !have_depends()) {
|
||||
return;
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(cnode_ptr_);
|
||||
MS_LOG(INFO) << "InferShape start, node:" << cnode_ptr_->fullname_with_scope();
|
||||
|
||||
auto inputs = cnode_ptr_->inputs();
|
||||
if (inputs.empty()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid inputs";
|
||||
}
|
||||
AbstractBasePtrList args_spec_list;
|
||||
auto primitive = GetValueNode<PrimitivePtr>(inputs[0]);
|
||||
|
||||
auto input_size = AnfAlgo::GetInputTensorNum(cnode_ptr_);
|
||||
for (size_t i = 0; i < input_size; ++i) {
|
||||
auto input_with_index = AnfAlgo::GetPrevNodeOutput(cnode_ptr_, i);
|
||||
auto real_input = input_with_index.first;
|
||||
|
||||
MS_EXCEPTION_IF_NULL(real_input);
|
||||
auto ret = depend_tensor_map_.find(i);
|
||||
if (ret != depend_tensor_map_.end()) {
|
||||
auto tensor_ptr = ret->second;
|
||||
MS_EXCEPTION_IF_NULL(tensor_ptr);
|
||||
// sync data from device to host
|
||||
tensor_ptr->data_sync();
|
||||
real_input->abstract()->set_value(tensor_ptr);
|
||||
}
|
||||
|
||||
auto cnode_input = cnode_ptr_->input(i + 1);
|
||||
MS_EXCEPTION_IF_NULL(cnode_input);
|
||||
if (IsTupleGetItem(cnode_input)) {
|
||||
auto base_shape = real_input->Shape();
|
||||
if (!base_shape->isa<abstract::TupleShape>()) {
|
||||
MS_LOG(EXCEPTION) << "Node:" << cnode_ptr_->fullname_with_scope()
|
||||
<< " input is a tuple_get_item but real input node shape is not a TupleShape";
|
||||
}
|
||||
auto tuple_ptr = base_shape->cast<abstract::TupleShapePtr>();
|
||||
MS_EXCEPTION_IF_NULL(tuple_ptr);
|
||||
auto tuple_get_item_index = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast<CNodePtr>());
|
||||
auto real_shape = tuple_ptr->shape().at(tuple_get_item_index);
|
||||
auto abstract_tensor = cnode_input->abstract()->cast<abstract::AbstractTensorPtr>();
|
||||
MS_EXCEPTION_IF_NULL(abstract_tensor);
|
||||
args_spec_list.emplace_back(std::make_shared<abstract::AbstractTensor>(abstract_tensor->element(), real_shape));
|
||||
} else if (cnode_input->isa<CNode>() && AnfAlgo::GetCNodeName(cnode_input) == prim::kPrimReshape->name()) {
|
||||
args_spec_list.emplace_back(cnode_input->abstract());
|
||||
} else {
|
||||
args_spec_list.emplace_back(real_input->abstract());
|
||||
}
|
||||
}
|
||||
|
||||
auto eval_result = abstract::CppInferShape(primitive, args_spec_list);
|
||||
cnode_ptr_->set_abstract(eval_result);
|
||||
}
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,62 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_EXECUTOR_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_EXECUTOR_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "ir/anf.h"
|
||||
#include "ir/tensor.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
|
||||
constexpr auto kDynamicShapeDepends = "dynamic_shape_depends";
|
||||
|
||||
class DynamicKernel {
|
||||
public:
|
||||
DynamicKernel(void *stream, const CNodePtr &cnode_ptr)
|
||||
: stream_(stream),
|
||||
cnode_ptr_(cnode_ptr),
|
||||
is_dynamic_shape_(false),
|
||||
is_input_dynamic_shape_(false),
|
||||
is_output_dynamic_shape_(false) {}
|
||||
virtual ~DynamicKernel() = default;
|
||||
virtual void InferShape();
|
||||
virtual void UpdateArgs() = 0;
|
||||
virtual void Execute() = 0;
|
||||
virtual void PostExecute() = 0;
|
||||
bool is_dynamic_shape() const { return is_dynamic_shape_; }
|
||||
bool is_input_dynamic_shape() const { return is_input_dynamic_shape_; }
|
||||
bool is_output_dynamic_shape() const { return is_output_dynamic_shape_; }
|
||||
bool have_depends() const { return !depend_tensor_map_.empty(); }
|
||||
virtual void Initialize();
|
||||
std::string GetKernelName() { return cnode_ptr_->fullname_with_scope(); }
|
||||
|
||||
protected:
|
||||
void *stream_;
|
||||
const CNodePtr cnode_ptr_;
|
||||
bool is_dynamic_shape_;
|
||||
bool is_input_dynamic_shape_;
|
||||
bool is_output_dynamic_shape_;
|
||||
std::map<uint32_t, tensor::TensorPtr> depend_tensor_map_;
|
||||
};
|
||||
using DynamicKernelPtr = std::shared_ptr<DynamicKernel>;
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_EXECUTOR_EXECUTOR_H_
|
|
@ -43,6 +43,8 @@ class GPUKernelRuntime : public KernelRuntime {
|
|||
const std::vector<CNodePtr> &execution_order) override;
|
||||
void AssignMemory(session::KernelGraph *graph) override;
|
||||
bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) override;
|
||||
bool GenDynamicKernel(const session::KernelGraph *graph) override { return true; }
|
||||
bool RunDynamicKernelAsync(const session::KernelGraph *graph) override { return true; }
|
||||
|
||||
protected:
|
||||
DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format,
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue