diff --git a/graphengine b/graphengine index 622af6c1c50..2dbfefcdd0d 160000 --- a/graphengine +++ b/graphengine @@ -1 +1 @@ -Subproject commit 622af6c1c50034bea5a08bd409c5a410782bfe53 +Subproject commit 2dbfefcdd0d4b958801403dbaf9efe46447dccd2 diff --git a/mindspore/_extends/parallel_compile/tbe_compiler/compiler.py b/mindspore/_extends/parallel_compile/tbe_compiler/compiler.py index a241bf9e104..01c79970429 100755 --- a/mindspore/_extends/parallel_compile/tbe_compiler/compiler.py +++ b/mindspore/_extends/parallel_compile/tbe_compiler/compiler.py @@ -17,8 +17,6 @@ import json import os import sys from te.platform.cce_conf import te_set_version -from te.platform.fusion_manager import op_build_cfg_dis, op_build_cfg_en, set_current_op_name, \ - init_op_pattern, set_op_params, set_op_build_type, get_op_pattern, set_current_op_func_name from te.platform.fusion_util import fusion_op from common import check_kernel_info, get_args, get_build_in_impl_path, get_ddk_version @@ -27,7 +25,6 @@ build_in_impl_path = get_build_in_impl_path() # op function list op_build = "compile" -op_pre_build = "pre_build" fusion_pattern_start_flag = "fusion_pattern_start" fusion_pattern_end_flag = "fusion_pattern_end" @@ -83,19 +80,7 @@ def build_op(build_type, json_str): else: op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0) # get function - if build_type == op_pre_build: - # set op parameter - op_build_cfg_dis() - set_current_op_func_name(op_name) - set_current_op_name(kernel_name) - init_op_pattern() - set_op_params(*outputs_args, *attrs_args, kernel_name=kernel_name) - set_op_build_type('prebuild') - if custom_flag: - py_fn_name = kernel_info['op_info']['name'] - else: - py_fn_name = op_name - elif build_type == op_build: + if build_type == op_build: if custom_flag: py_fn_name = kernel_info['op_info']['name'] else: @@ -106,13 +91,6 @@ def build_op(build_type, json_str): if op_func is None: raise ValueError("Op:{} function {} is not supported by Tbe.".format(op_name, build_type)) - # pre build - if build_type == op_pre_build: - op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name) - # disable only pattern configuration - op_build_cfg_en() - return get_op_pattern() - # call function if kernel_name[0:19] == "bounding_box_encode": return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name_val=kernel_name) @@ -120,8 +98,6 @@ def build_op(build_type, json_str): return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name) except Exception as e: - if build_type == op_pre_build: - op_build_cfg_en() raise RuntimeError(e) @@ -136,14 +112,9 @@ def compile_fusion_op(json_str): Exception: If specific keyword is not found. """ args = json.loads(json_str) + te_set_version(ddk_version) if 'fusion_op' not in args or not args['fusion_op']: raise ValueError("Json string Errors, key:fusion_op not found.") - if 'prebuild_ops' not in args or not args['prebuild_ops']: - raise ValueError("Json string Errors, key:prebuild_ops not found.") - - pre_build_op_list = args['prebuild_ops'] - for op in pre_build_op_list: - build_op(op_pre_build, json.dumps(op)) fusion_op_arg = args['fusion_op'] return fusion_op(json.dumps(fusion_op_arg)) @@ -159,8 +130,6 @@ def compile_with_json(json_str): json_info = json.loads(json_str) if "fusion_op" in json_info: ret = compile_fusion_op(json_str) - elif "compile_type" in json_info: - ret = build_op(op_pre_build, json_str) else: ret = build_op(op_build, json_str) return ret diff --git a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc index cdb2fe10477..f7c02236c52 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_build.cc @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include "runtime/device/kernel_runtime.h" #include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h" #include "backend/kernel_compiler/akg/akg_kernel_build.h" @@ -218,7 +220,7 @@ void SetNodeInputs(const std::shared_ptr &anf_node, mindspore::NodeDef mindspore::TensorShape_Dim *dim = tensorShape->add_dim(); dim->set_size((::google::protobuf::int64)item); } - node_inputs->set_tensor_type((mindspore::DataType)input_data_type); + node_inputs->set_tensor_type(input_data_type); node_inputs->set_mem_device("HBM"); } } @@ -245,7 +247,7 @@ void SetNodeOutputs(const std::shared_ptr &anf_node, mindspore::NodeDef } TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index); int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type); - node_outputs->set_tensor_type((mindspore::DataType)output_data_type); + node_outputs->set_tensor_type(output_data_type); node_outputs->set_mem_device("HBM"); } } @@ -287,6 +289,109 @@ bool CreateNodeDefBytes(const std::shared_ptr &anf_node, return true; } +bool CreateExtInfo(const std::shared_ptr &anf_node, const std::shared_ptr &kernel_mod_ptr) { + if (!anf_node->isa()) { + return true; + } + + if (!AnfAlgo::IsDynamicShape(anf_node)) { + return true; + } + + MS_LOG(INFO) << "CreateExtInfo start, " << anf_node->fullname_with_scope(); + + int32_t unknown_shape_type = UnknowShapeOpType::DEPEND_COMPUTE; + uint64_t ext_info_head_len = kExtInfoHeadSize; + std::string ext_info; + size_t input_num = AnfAlgo::GetInputTensorNum(anf_node); + size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node); + + // 1.addr:unknown shape type + uint64_t ext_info_len = ext_info.size(); + ext_info_len += ext_info_head_len + sizeof(int32_t); + + // 2.addr:input ShapeAndType + ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType); + + // 3.addr:output ShapeAndType + ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType); + + uint64_t ext_info_offset = ext_info.size(); + ext_info.resize(ext_info_len, 0); + char *ext_info_buf = ext_info.data(); + + // deal1: unknown shape type + ExtInfo *info = reinterpret_cast(ext_info_buf + ext_info_offset); + info->infoType = FWK_ADPT_EXT_SHAPE_TYPE; + info->infoLen = sizeof(int32_t); + ext_info_offset += ext_info_head_len; + int32_t *shape_type = reinterpret_cast(ext_info_buf + ext_info_offset); + *shape_type = unknown_shape_type; + ext_info_offset += info->infoLen; + + // deal2:input ShapeAndType + info = reinterpret_cast(ext_info_buf + ext_info_offset); + info->infoType = FWK_ADPT_EXT_INPUT_SHAPE; + info->infoLen = input_num * sizeof(ShapeAndType); + ext_info_offset += ext_info_head_len; + + ShapeAndType *inputs = reinterpret_cast(ext_info_buf + ext_info_offset); + for (size_t input_index = 0; input_index < input_num; input_index++) { + TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index); + std::vector input_shape; + int32_t input_data_type; + if (input_type == kObjectTypeString) { + auto cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto input_node = cnode->inputs()[input_index + 1]; + auto value_ptr = GetValueNode(input_node); + auto value = GetValue(value_ptr); + input_shape.push_back(1); + input_shape.push_back(value.size()); + input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown); + } else { + input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index); + input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type); + } + inputs[input_index].type = input_data_type; + + size_t input_shape_index = 0; + for (; input_shape_index < input_shape.size(); input_shape_index++) { + inputs[input_index].dims[input_shape_index] = SizeToLong(input_shape[input_shape_index]); + } + if (input_shape.size() < kMaxShapeDims) { + inputs[input_index].dims[input_shape_index] = LLONG_MIN; + } + } + ext_info_offset += info->infoLen; + + // deal3:output ShapeAndType + info = reinterpret_cast(ext_info_buf + ext_info_offset); + info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE; + info->infoLen = output_num * sizeof(ShapeAndType); + ext_info_offset += ext_info_head_len; + + ShapeAndType *outputs = reinterpret_cast(ext_info_buf + ext_info_offset); + for (size_t output_index = 0; output_index < output_num; output_index++) { + std::vector output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); + TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index); + int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type); + outputs[output_index].type = output_data_type; + + size_t output_shape_index = 0; + for (; output_shape_index < output_shape.size(); output_shape_index++) { + outputs[output_index].dims[output_shape_index] = SizeToLong(output_shape[output_shape_index]); + } + if (output_shape_index < kMaxShapeDims) { + outputs[output_index].dims[output_shape_index] = LLONG_MIN; + } + } + + // set ext info + kernel_mod_ptr->SetExtInfo(ext_info); + return true; +} + KernelModPtr AicpuOpBuild(const std::shared_ptr &anf_node) { MS_EXCEPTION_IF_NULL(anf_node); std::string op_name = AnfAlgo::GetCNodeName(anf_node); @@ -300,6 +405,11 @@ KernelModPtr AicpuOpBuild(const std::shared_ptr &anf_node) { if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) { MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!"; } + + if (!CreateExtInfo(anf_node, kernel_mod_ptr)) { + MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!"; + } + if (!SetIOSize(anf_node, kernel_mod_ptr)) { MS_LOG(EXCEPTION) << "Set input output size list failed."; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc index d00fab381ec..c7d7a3f1a2a 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc @@ -43,6 +43,7 @@ AicpuOpKernelMod::~AicpuOpKernelMod() { input_size_list_.clear(); output_size_list_.clear(); workspace_size_list_.clear(); + ext_info_.clear(); } void AicpuOpKernelMod::SetInputSizeList(const std::vector &size_list) { input_size_list_ = size_list; } @@ -54,6 +55,7 @@ const std::vector &AicpuOpKernelMod::GetWorkspaceSizeList() const { retu void AicpuOpKernelMod::SetInputList(const std::vector &inputList) { inputList_ = inputList; } void AicpuOpKernelMod::SetOutputList(const std::vector &outputList) { outputList_ = outputList; } void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); } +void AicpuOpKernelMod::SetExtInfo(const std::string &ext_info) { ext_info_ = ext_info; } void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; } void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) { MS_EXCEPTION_IF_NULL(anf_node); @@ -84,16 +86,30 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector &inputs auto node_def_len = node_def_str_.length(); param_len += node_def_len; + param_len += sizeof(uint32_t); + + AicpuParamHead aicpu_param_head; + aicpu_param_head.length = param_len; + aicpu_param_head.ioAddrNum = io_addrs_num; + + if (ext_info_.empty()) { + MS_LOG(INFO) << "Static Shape Kernel"; + aicpu_param_head.extInfoLength = 0; + aicpu_param_head.extInfoAddr = 0; + } else { + MS_LOG(INFO) << "Dynamic Kernel Ext Info size:" << ext_info_.size(); + } - // Create taskArgs: AicpuParamHead + ioAddrs + notifyId + customizedAttr - AicpuParamHead paramHead = {static_cast(param_len), static_cast(io_addrs_num)}; args_.clear(); - (void)args_.append(reinterpret_cast(¶mHead), sizeof(AicpuParamHead)); + (void)args_.append(reinterpret_cast(&aicpu_param_head), sizeof(AicpuParamHead)); // TaskArgs append ioAddrs if (io_addrs_size != 0) { (void)args_.append(reinterpret_cast(io_addrs.data()), io_addrs_size); } + // size for node_def + args_.append(reinterpret_cast(&node_def_len), sizeof(uint32_t)); + // When it's aicpu customized ops, taskArgs should append customized attr if (node_def_len != 0) { (void)args_.append(reinterpret_cast(node_def_str_.data()), node_def_len); @@ -145,8 +161,9 @@ std::vector AicpuOpKernelMod::GenTask(const std::vector node_name_ = kTopKV2; } - AicpuTaskInfoPtr task_info_ptr = make_shared( - kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump()); + AicpuTaskInfoPtr task_info_ptr = + make_shared(kernel_name_, stream_id, node_so_, node_name_, node_def_str_, + ext_info_, input_data_addrs, output_data_addrs, NeedDump()); MS_LOG(INFO) << "AicpuOpKernelMod GenTask end"; return {task_info_ptr}; diff --git a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.h b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.h index 9bc75d11101..7d006cc67dd 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.h +++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.h @@ -36,6 +36,7 @@ class AicpuOpKernelMod : public AscendKernelMod { void SetOutputList(const std::vector &outputList); void SetAnfNode(const AnfNodePtr &anf_node); void SetNodeDef(const std::string &nodeDef); + void SetExtInfo(const std::string &ext_info); void SetNodeName(const std::string &node_name); /** @@ -58,6 +59,7 @@ class AicpuOpKernelMod : public AscendKernelMod { std::string node_def_str_; std::string node_name_; std::string node_so_; + std::string ext_info_; std::vector inputList_; std::vector outputList_; AnfNodePtr anf_node_; diff --git a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h index d68aef3f860..01a8f577189 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h +++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h @@ -21,7 +21,6 @@ #include #include #include "backend/kernel_compiler/kernel.h" - namespace mindspore { namespace kernel { constexpr auto kInitDataSetQueue = "InitDataSetQueue"; @@ -50,6 +49,36 @@ struct AicpuParamHead { uint64_t extInfoAddr; // extInfo address } __attribute__((packed)); +const uint32_t kExtInfoHeadSize = 8; +struct ExtInfo { + int32_t infoType; // extend type + uint32_t infoLen; // length for infoMsg + char infoMsg[0]; // extend value +} __attribute__((packed)); + +// Extent info ShapeAndType +const uint32_t kMaxShapeDims = 8; +struct ShapeAndType { + int32_t type; + int64_t dims[kMaxShapeDims]; +} __attribute__((packed)); + +// Extend Info type for task +enum FWKTaskExtInfoType { + FWK_ADPT_EXT_SHAPE_TYPE = 0, + FWK_ADPT_EXT_INPUT_SHAPE, + FWK_ADPT_EXT_OUTPUT_SHAPE, + FWK_ADPT_EXT_INVALID +}; + +// for unknown shape op type +enum UnknowShapeOpType { + DEPEND_IN_SHAPE = 1, // op out shape get by input shape + DEPEND_CONST_VALUE = 2, // op out shape get by const op value + DEPEND_SHAPE_RANGE = 3, // op out shape get by range + DEPEND_COMPUTE = 4 // op out shape get by totally computing +}; + class AicpuOpUtil { public: static int MsTypeToProtoType(TypeId ms_type); diff --git a/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/attr.proto b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/attr.proto index a0ab4bd1e76..fee2172c1d1 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/attr.proto +++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/attr.proto @@ -26,7 +26,7 @@ message AttrValue { repeated int64 i = 3 [ packed = true ]; //"array(int)" repeated float f = 4 [ packed = true ]; //"array(float)" repeated bool b = 5 [ packed = true ]; //"array(bool)" - repeated DataType type = 6 [ packed = true ]; //"array(type)" + repeated int32 type = 6 [ packed = true ]; //"array(type)" repeated TensorShape shape = 7; //"array(shape)" repeated Tensor tensor = 8; //"array(tensor)" } diff --git a/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/node_def.proto b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/node_def.proto index b0c0e0f349b..b1a76957d59 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/node_def.proto +++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/node_def.proto @@ -18,9 +18,16 @@ package mindspore; import "attr.proto"; import "tensor.proto"; +message DynamicIdx { + int32 idx = 1; + int32 num = 2; +} + message NodeDef { string op = 2; map attrs = 3; repeated Tensor inputs = 4; repeated Tensor outputs = 5; + map dym_inputs = 6; + map dym_outputs = 7; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor.proto b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor.proto index b4fd66595a1..1240a97ab73 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor.proto +++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor.proto @@ -26,9 +26,12 @@ message Tensor { TensorShape tensor_shape = 1; // tensor content data type - DataType tensor_type = 2; + int32 tensor_type = 2; // tensor memory device // data located memory device , "DDR" "HBM" OR "NONE" string mem_device = 3; + string name = 4; + uint64 data_ptr = 5; + uint64 data_size = 6; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor_shape.proto b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor_shape.proto index 70534e8ebab..12b07e09673 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor_shape.proto +++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/tensor_shape.proto @@ -31,5 +31,5 @@ message TensorShape { bool unknown_rank = 3; // data format "NHWC" "NCHW" "NC1HWC0" OR "NONE" - string data_format = 4; + int32 data_format = 4; }; diff --git a/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/types.proto b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/types.proto index 574259d97df..4cbff252bf5 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/types.proto +++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/types.proto @@ -19,17 +19,30 @@ option cc_enable_arenas = true; package mindspore; enum DataType { - MS_UNKNOWN = 0; - MS_BOOL = 1; + MS_FLOAT32 = 0; + MS_FLOAT16 = 1; MS_INT8 = 2; - MS_UINT8 = 3; - MS_INT16 = 4; - MS_UINT16 = 5; - MS_INT32 = 6; - MS_UINT32 = 7; - MS_INT64 = 8; - MS_UINT64 = 9; - MS_FLOAT16 = 10; - MS_FLOAT32 = 11; - MS_FLOAT64 = 12; + MS_INT32 = 3; + MS_UINT8 = 4; + MS_INT16 = 6; + MS_UINT16 = 7; + MS_UINT32 = 8; + MS_INT64 = 9; + MS_UINT64 = 10; + MS_FLOAT64 = 11; + MS_BOOL = 12; + MS_STRING = 13; + MS_DUAL_SUB_INT8 = 14; + MS_DUAL_SUB_UINT8 = 15; + MS_COMPLEX64 = 16; + MS_COMPLEX128 = 17; + MS_QINT8 = 18; + MS_QINT16 = 19; + MS_QINT32 = 20; + MS_QUINT8 = 21; + MS_QUINT16 = 22; + MS_RESOURCE = 23; + MS_STRING_REF = 24; + MS_DUAL = 25; + MS_UNKNOWN = 26; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/kernel.h b/mindspore/ccsrc/backend/kernel_compiler/kernel.h index c41223220c4..add268374c4 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/kernel.h @@ -37,7 +37,6 @@ enum FusionType { COMMREDUCE, SEGMENT, OPAQUE, - DYNAMIC, UNKNOWN_FUSION_TYPE = -1, }; enum OpPattern { @@ -80,8 +79,8 @@ class KernelPack { bool LoadKernelMeta(const std::string &json_f, const std::string &processor); bool ReadFromJsonFile(const std::string &json_f, const std::string &processor); const std::string Serialize() const; - const FlexArray *const GetJson() const { return json_; } - const FlexArray *const GetKernel() const { return kernel_; } + const FlexArray *GetJson() const { return json_; } + const FlexArray *GetKernel() const { return kernel_; } ~KernelPack() { if (json_) { delete[] json_; diff --git a/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc index 1bae3fa2257..55d74686171 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.cc @@ -19,53 +19,36 @@ #include #include #include -#include #include "backend/kernel_compiler/tbe/tbe_kernel_build.h" #include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h" #include "backend/kernel_compiler/tbe/tbe_utils.h" #include "backend/kernel_compiler/tbe/tbe_convert_utils.h" +#include "utils/ms_context.h" namespace mindspore { namespace kernel { using mindspore::kernel::tbe::TbeUtils; -static bool GenPreBuildKernelJson(const std::vector &compute_nodes, - std::vector *prebuild_op_list) { - MS_EXCEPTION_IF_NULL(prebuild_op_list); - TbeKernelJsonCreator creator(PREBUILD); - for (const auto &anf_node : compute_nodes) { - nlohmann::json prebuild; - if (!creator.GenTbeSingleKernelJson(anf_node, &prebuild)) { - MS_LOG(ERROR) << "GenTbeSingleKernelJson failed"; - return false; - } - (*prebuild_op_list).push_back(prebuild); - } - return true; -} - std::map KernelFusion(const std::vector &fusion_scopes) { MS_LOG(INFO) << "kernel fusion build start, scope size:" << fusion_scopes.size(); std::map kernel_mod_ret; auto build_manger = std::make_shared(); MS_EXCEPTION_IF_NULL(build_manger); for (const auto &fusion_scope_iter : fusion_scopes) { - auto scope_id = fusion_scope_iter.scope_id; + string fusion_kernel_name; nlohmann::json fusion_op; - string fusion_kernel = "te_fusion"; if (!TbeKernelBuild::GenFusionScopeJson(fusion_scope_iter.input_nodes, fusion_scope_iter.compute_nodes, &fusion_op, - &fusion_kernel)) { + &fusion_kernel_name)) { continue; } // gen kernel_name & check cache std::string json_str = fusion_op.dump(); size_t hash_id = std::hash()(json_str); - auto json_name = fusion_kernel.append("_").append(std::to_string(hash_id)); + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + auto device_id = context_ptr->get_param(MS_CTX_DEVICE_ID); + auto json_name = + fusion_kernel_name.append("_").append(std::to_string(hash_id)).append("_").append(std::to_string(device_id)); fusion_op["fusion_op_name"] = json_name; - // gen json for prebuild - std::vector prebuild_op_list; - if (!GenPreBuildKernelJson(fusion_scope_iter.compute_nodes, &prebuild_op_list)) { - continue; - } // get io size std::vector input_size_list; std::vector output_size_list; @@ -80,20 +63,20 @@ std::map KernelFusion(const std::vector auto kernel_mod = build_manger->GenKernelMod(json_name, tbe::kProcessorAiCore, input_size_list, output_size_list, kernel_pack); if (kernel_mod != nullptr) { - kernel_mod_ret[scope_id] = kernel_mod; + kernel_mod_ret[fusion_scope_iter.scope_id] = kernel_mod; continue; } } // fusion build nlohmann::json fusion_json; fusion_json["fusion_op"] = fusion_op; - fusion_json["prebuild_ops"] = prebuild_op_list; auto task_id = build_manger->StartCompileOp(fusion_json); TbeUtils::SaveJsonInfo(json_name, fusion_json.dump()); if (task_id < 0) { MS_EXCEPTION(ArgumentError) << "start compile failed."; } - build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list, scope_id); + build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list, + fusion_scope_iter.scope_id); } int build_failed_num = 0; diff --git a/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h index 089f41f2b8b..1579953e36e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h +++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_fusion.h @@ -16,6 +16,7 @@ #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_ #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_ +#include #include #include #include "backend/kernel_compiler/kernel.h" @@ -25,11 +26,9 @@ namespace kernel { * @brief fuse op and return a callable mod */ struct FusionScopeInfo { - FusionScopeInfo() {} - FusionScopeInfo(int32_t id, const std::vector &in, const std::vector &comp, - const std::vector &out) - : scope_id(id), input_nodes(in), compute_nodes(comp), output_nodes(out) {} - int32_t scope_id; + FusionScopeInfo(int32_t id, std::vector in, std::vector comp, std::vector out) + : scope_id(id), input_nodes(std::move(in)), compute_nodes(std::move(comp)), output_nodes(std::move(out)) {} + int32_t scope_id{}; std::vector input_nodes; std::vector compute_nodes; std::vector output_nodes; diff --git a/mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h b/mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h index 808fa14413c..2dfa0ea7728 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h +++ b/mindspore/ccsrc/backend/kernel_compiler/oplib/oplib.h @@ -40,14 +40,13 @@ class OpLib { private: static bool RegOpFromLocalInfo(); - static bool DecodeOpInfo(const nlohmann::json &obj, const OpImplyType imply_type, const std::string &impl_path); - static bool DecodeAttr(const nlohmann::json &obj, const OpImplyType imply_type, - const std::shared_ptr &op_info); + static bool DecodeOpInfo(const nlohmann::json &obj, OpImplyType imply_type, const std::string &impl_path); + static bool DecodeAttr(const nlohmann::json &obj, OpImplyType imply_type, const std::shared_ptr &op_info); static bool DecodeDtypeFormat(const nlohmann::json &dtype_format, const std::shared_ptr &op_io, size_t index); static void DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_ptr &op_info); static void DecodeAKGSpecificInfo(const nlohmann::json &obj, const std::shared_ptr &op_info); - static bool DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply_type, const OpIOType io_type, + static bool DecodeInputOutput(const nlohmann::json &obj, OpImplyType imply_type, OpIOType io_type, const std::shared_ptr &op_info, const nlohmann::json &dtype_format); static bool GetRefInfo(const std::shared_ptr &op_info); static bool CheckRepetition(const std::shared_ptr &op_info); diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc index 07e2893294f..ca972899ba5 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.cc @@ -173,7 +173,7 @@ void TbeAdapter::NormalizeFuncName(std::string *func_name) { *func_name = name_tmp; auto iter = tbe_func_adapter_map.find(*func_name); if (iter != tbe_func_adapter_map.end()) { - MS_LOG(INFO) << "map actual op from me " << *func_name << " to tbe op" << iter->second; + MS_LOG(INFO) << "Map actual op from me: " << *func_name << " to tbe op: " << iter->second; *func_name = iter->second; } } diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h index b37cf68da64..027b8e4b884 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_adapter.h @@ -27,7 +27,7 @@ // the TBE back-end operator implementation difference namespace mindspore { namespace kernel { -enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE }; +enum kCreaterType : int { SINGLE_BUILD = 0, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE }; namespace tbe { using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector> &op_info_attrs, nlohmann::json *attrs_json); diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc index 34165c47995..806b06d6f21 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_convert_utils.cc @@ -63,7 +63,7 @@ const std::unordered_map type_nbyte_maps = { const std::unordered_map fusion_type_maps = { {"CONVLUTION", FusionType::CONVLUTION}, {"ELEMWISE", FusionType::ELEMWISE}, {"COMMREDUCE", FusionType::COMMREDUCE}, - {"SEGMENT", FusionType::SEGMENT}, {"DYNAMIC", FusionType::DYNAMIC}, {"OPAQUE", FusionType::OPAQUE}, + {"SEGMENT", FusionType::SEGMENT}, {"OPAQUE", FusionType::OPAQUE}, }; TypeId DtypeToTypeId(const std::string &dtypes) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc index 39e788f89ca..b8f0562e085 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.cc @@ -24,6 +24,7 @@ #include "backend/kernel_compiler/tbe/tbe_adapter.h" #include "backend/kernel_compiler/tbe/tbe_convert_utils.h" #include "backend/kernel_compiler/tbe/tbe_utils.h" +#include "utils/ms_context.h" namespace mindspore { namespace kernel { @@ -71,14 +72,20 @@ constexpr auto kVTypeListListInt = "listListInt"; constexpr auto kJValue = "value"; constexpr auto kJDynIndex = "dyn_index"; constexpr auto kJFuncName = "func_name"; - -std::string NormalizeFullScopeName(const string &full_scope_name) { - // exp:Default/ReLU-op0 -->Default_ReLU_op0 - string normal_ret = full_scope_name; - std::replace(normal_ret.begin(), normal_ret.end(), '/', '_'); - std::replace(normal_ret.begin(), normal_ret.end(), '-', '_'); - return normal_ret; -} +constexpr auto kJL1AddrOffset = "L1_addr_offset"; +constexpr auto kJL1FusionType = "L1_fusion_type"; +constexpr auto kJL1WorkspaceSize = "L1_workspace_size"; +constexpr auto kJAddrType = "addr_type"; +constexpr auto kJSliceOffset = "slice_offset"; +constexpr auto kJSplitIndex = "split_index"; +constexpr auto kJTotalShape = "total_shape"; +constexpr auto kJValidShape = "valid_shape"; +constexpr auto kJModuleName = "module_name"; +constexpr auto kJPattern = "pattern"; +constexpr auto kJPyModulePath = "py_module_path"; +constexpr auto kJPreBuildOutsAttrs = "prebuild_outs_attrs"; +constexpr auto kJKwdArgs = "kwds_args"; +constexpr auto kJListArgs = "list_args"; bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr &anf_node, nlohmann::json *kernel_json) { @@ -117,13 +124,12 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr()(json_str); - json_name_ = op_name + "_" + std::to_string(hash_id); + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + auto device_id = context_ptr->get_param(MS_CTX_DEVICE_ID); + json_name_ = op_name + "_" + std::to_string(hash_id) + "_" + std::to_string(device_id); json_info_ = json_str; - if (creater_type_ == PREBUILD) { - op_info_json[kJKernelName] = NormalizeFullScopeName(anf_node->fullname_with_scope()); - } else { - op_info_json[kJKernelName] = json_name_; - } + op_info_json[kJKernelName] = json_name_; (*kernel_json)[kJOpInfo] = op_info_json; (*kernel_json)[kJFullName] = anf_node->fullname_with_scope(); if (creater_type_ == SINGLE_BUILD) { @@ -581,25 +587,25 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector &input_nodes, const std::vector &compute_nodes, - nlohmann::json *fusion_str, std::string *fusion_kernel) { - MS_EXCEPTION_IF_NULL(fusion_str); - MS_EXCEPTION_IF_NULL(fusion_kernel); + nlohmann::json *fusion_json, std::string *fusion_kernel_name) { + MS_EXCEPTION_IF_NULL(fusion_json); + MS_EXCEPTION_IF_NULL(fusion_kernel_name); // get input layer info std::vector> input_layers; std::map spec_data_input; if (!GetInputLayers(input_nodes, compute_nodes, &input_layers, &spec_data_input)) { return false; } - // gen fusion scopre_op jsom + // gen fusion scopre_op json std::vector compute_list; - (*fusion_kernel) = kFusionKernelNamePrfix; + (*fusion_kernel_name) = kFusionKernelNamePrfix; // index: fusion build option input record, next one from 0 static size_t index = 0; auto layer_iter = input_layers.begin(); auto compute_op_iter = compute_nodes.begin(); for (; compute_op_iter != compute_nodes.end(); ++compute_op_iter, ++layer_iter) { nlohmann::json compute_op_str; - (void)GenFusionComputeJson(*compute_op_iter, &layer_iter, &compute_op_str, fusion_kernel, &index); + (void)GenFusionComputeJson(*compute_op_iter, &layer_iter, &compute_op_str, fusion_kernel_name, &index); compute_list.push_back(compute_op_str); } index = 0; @@ -617,36 +623,122 @@ bool TbeKernelBuild::GenFusionScopeJson(const std::vector } index = 0; data_list.insert(data_list.end(), compute_list.begin(), compute_list.end()); - (*fusion_str)[kFusionOpList] = data_list; + (*fusion_json)[kFusionOpList] = data_list; return true; } +void TbeKernelBuild::GenPreDescJson(nlohmann::json *output_desc) { + MS_EXCEPTION_IF_NULL(output_desc); + (*output_desc)[kJL1AddrOffset] = 0; + (*output_desc)[kJL1FusionType] = -1; + (*output_desc)[kJL1WorkspaceSize] = -1; + (*output_desc)[kJAddrType] = 0; +} + +void TbeKernelBuild::GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str, + std::string *fusion_kernel_name) { + MS_EXCEPTION_IF_NULL(compute_op_str); + MS_EXCEPTION_IF_NULL(fusion_kernel_name); + // gen others + auto origin_type = AnfAlgo::GetCNodeName(cnode); + // replace special op type for buffer fusion op + auto type = GetRealOpType(origin_type); + (*compute_op_str)[kJtype] = type; + tbe::TbeAdapter::NormalizeFuncName(&type); + (*compute_op_str)[kJFuncName] = type; + (*compute_op_str)[kJModuleName] = std::string("impl.") + type; + (*compute_op_str)[kJName] = cnode->fullname_with_scope(); + (*compute_op_str)[kJPattern] = GetNodeFusionType(cnode); + (*compute_op_str)[kJPyModulePath] = "/usr/local/Ascend/opp/op_impl/build_in/ai_core/tbe"; + (void)(*fusion_kernel_name).append("_"); + (void)(*fusion_kernel_name).append(type); +} + +void TbeKernelBuild::GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str) { + MS_EXCEPTION_IF_NULL(cnode); + MS_EXCEPTION_IF_NULL(compute_op_str); + // kwds args + nlohmann::json json_prebuild_args; + json_prebuild_args[kJKwdArgs] = nlohmann::json::object(); + // list_args + nlohmann::json json_list_args; + // list_args: output args + auto output_size = AnfAlgo::GetOutputTensorNum(cnode); + for (size_t i = 0; i < output_size; ++i) { + nlohmann::json output_desc; + GenDescJson(cnode, i, i, &output_desc); + output_desc[kJDtype] = output_desc[kJDataType]; + json_list_args.push_back(output_desc); + } + // list_args: attr args + auto op_name = AnfAlgo::GetCNodeName(cnode); + auto opinfo = OpLib::FindOp(op_name, OpImplyType::kTBE); + MS_EXCEPTION_IF_NULL(opinfo); + TbeKernelJsonCreator json_creater(SINGLE_BUILD); + nlohmann::json json_attr_args; + if (!json_creater.GenTbeAttrJson(cnode, opinfo, &json_attr_args)) { + MS_LOG(INFO) << "Fusion warning: get prebuild args of attr failed."; + } + for (const auto &attr : json_attr_args) { + // if(attr[kJName] != "isRef" && attr["valid"] == true) { + if (attr[kJName] != "isRef" && attr[kJValid] == true) { + json_list_args.push_back(attr[kJValue]); + } + } + json_prebuild_args[kJListArgs] = json_list_args; + (*compute_op_str)[kJPreBuildOutsAttrs] = json_prebuild_args; +} + +void TbeKernelBuild::GenSuffixDescJson(nlohmann::json *output_desc) { + MS_EXCEPTION_IF_NULL(output_desc); + (*output_desc)[kJSliceOffset] = nlohmann::json::array(); + (*output_desc)[kJSplitIndex] = 0; + (*output_desc)[kJTotalShape] = nlohmann::json::array(); + (*output_desc)[kJValidShape] = nlohmann::json::array(); +} + +// anf_node: this node is used to get output desc(type\foramt\shape ...) +// node_out_idx: node output index +// desc_output_idx: this index use to add json +// nlohmann::json *output_desc: for return +// FusionDataType fusion_data_type: speceial process json desc output shape [kFusionAddN, kFusionReLUGradV2] void TbeKernelBuild::GenDescJson(const std::shared_ptr &anf_node, size_t node_out_idx, size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type) { + GenPreDescJson(output_desc); + // data_type + auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx); + (*output_desc)[kJDataType] = tbe::TypeIdToString(type_id); + // name std::string output_desc_name = anf_node->fullname_with_scope(); if (node_out_idx > 0) { output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx); } - (*output_desc)[kJName] = NormalizeFullScopeName(output_desc_name); - auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx); - (*output_desc)[kJDataType] = tbe::TypeIdToString(type_id); + (*output_desc)[kJName] = output_desc_name; + // ori_format + (*output_desc)[kJOriFormat] = kOpFormat_NCHW; + // ori_shape auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, node_out_idx); if (ori_shape.empty()) { ori_shape.emplace_back(1); } (*output_desc)[kJOriShape] = ori_shape; + // !! Note: output_index, only node's output use it + (*output_desc)[kJOutputIndex] = desc_output_idx; + // shape auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, node_out_idx); if (shape.empty()) { shape.emplace_back(1); } (*output_desc)[kJShape] = shape; + // !! Note: format: only data node's output use it auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx); if (format == kOpFormat_DEFAULT) { format = ori_shape.size() == 4 ? kOpFormat_NCHW : kOpFormat_ND; + } else if (format == kOpFormat_FRAC_Z) { + format = kOpFormat_FRACTAL_Z; } (*output_desc)[kJFormat] = format; - (*output_desc)[kJOriFormat] = kOpFormat_NCHW; - (*output_desc)[kJOutputIndex] = desc_output_idx; + // special node if (fusion_data_type == kFusionAddN && format == kOpFormat_NC1HWC0) { std::vector spec_shape = {}; spec_shape.emplace_back(shape[0]); @@ -663,12 +755,13 @@ void TbeKernelBuild::GenDescJson(const std::shared_ptr &anf_ (*output_desc)[kJShape] = spec_shape; (*output_desc)[kJDataType] = kVTypeBool; } + GenSuffixDescJson(output_desc); } void TbeKernelBuild::GenReusedOutputDesc(const std::shared_ptr &anf_node, size_t index, size_t output_index, nlohmann::json *output_desc) { std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index); - (*output_desc)[kJName] = NormalizeFullScopeName(output_desc_name); + (*output_desc)[kJName] = output_desc_name; (*output_desc)[kJOutputIndex] = output_index; std::vector shape; (*output_desc)[kJShape] = shape; @@ -692,6 +785,9 @@ bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name, return true; } +// : contains parameter/data node, input order may doesn't match tbe input order; +// : contains cnode, inputs order may doesn't match tbe input order; +// Special process node list: reference tbe_adapter.cc [except: Conv2DBackpropInput] bool TbeKernelBuild::GetInputLayers(const std::vector &input_nodes, const std::vector &compute_nodes, std::vector> *input_layers, @@ -722,7 +818,7 @@ bool TbeKernelBuild::GetInputLayers(const std::vector &in MS_LOG(INFO) << "Fusion info: add compute node's [" << i << "] input: " << input->fullname_with_scope(); layer.emplace_back((*find_iter)); } else { - MS_LOG(INFO) << "Fusion warnig: this input [" << i << "] may be pre compute(" << input->fullname_with_scope() + MS_LOG(INFO) << "Fusion warning: this input [" << i << "] may be pre compute(" << input->fullname_with_scope() << ") node's output."; } } @@ -750,8 +846,9 @@ bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr output_desc_list; + // if data_input is null, this is optional input. if (!data_input) { - MS_LOG(INFO) << "Data input is optional node"; + MS_LOG(INFO) << "Fusion info: data input is optional node"; auto name = std::string(kOptional) + std::to_string(*index); (*data_str)[kJName] = name; nlohmann::json output_desc; @@ -767,12 +864,16 @@ bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptrfullname_with_scope() << " index:" << real_idx; + MS_LOG(INFO) << "Fusion info: Real name: " << real_node->fullname_with_scope() << ". index:" << real_idx; // kJOutputDesc nlohmann::json output_desc; GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type); output_desc_list.push_back(output_desc); - (*data_str)[kJName] = NormalizeFullScopeName(real_node->fullname_with_scope()); + auto full_name = real_node->fullname_with_scope(); + if (real_idx > 0) { + full_name = full_name.append("_").append(std::to_string(real_idx)); + } + (*data_str)[kJName] = full_name; } (*data_str)[kJOutputDesc] = output_desc_list; (*data_str)[kJtype] = "Data"; @@ -808,6 +909,7 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) { size_t TbeKernelBuild::GetOptionalInput(const mindspore::CNodePtr &cnode, bool is_dynamic_input) { MS_EXCEPTION_IF_NULL(cnode); if (is_dynamic_input) { + // Node can not have optional & dynamic input. return 0; } MS_EXCEPTION_IF_NULL(cnode); @@ -831,22 +933,46 @@ std::string TbeKernelBuild::GetRealOpType(const std::string &origin_type) { return result; } +std::string TbeKernelBuild::GetNodeFusionType(const mindspore::CNodePtr &cnode) { + MS_EXCEPTION_IF_NULL(cnode); + auto node_type = AnfAlgo::GetCNodeName(cnode); + static std::map fusion_type_map = {{kConv2DOpName, "Convolution"}, + {kBNTrainingReduceOpName, "bn_reduce"}, + {kBNTrainingUpdateOpName, "bn_update"}, + {kReluV2OpName, "ElemWise"}, + {kTensorAddOpName, "ElemWise"}, + {kConv2DBackpropInputOpName, "Conv2d_backprop_input"}, + {kAddNOpName, "ElemWise"}, + {kReluGradV2OpName, "ElemWise"}, + {kRealDivOpName, "ElemWise"}}; + auto find = fusion_type_map.find(node_type); + if (find == fusion_type_map.end()) { + MS_LOG(INFO) << "Fusion warning: get node fusion type failed, origin node type: " << node_type + << " return null string."; + return ""; + } else { + return find->second; + } +} + bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, std::vector>::iterator *layer_iter, std::vector *input_desc_list, size_t *index) { MS_EXCEPTION_IF_NULL(cnode); MS_EXCEPTION_IF_NULL(input_desc_list); std::vector input_desc_list_tmp = {}; + // 1. input json bool is_dynamic_input = IsDynamicInput(cnode); for (size_t i = 1; i < cnode->inputs().size(); ++i) { auto input = cnode->input(i); auto kernel_idx = AnfAlgo::VisitKernel(input, 0); auto real_node = kernel_idx.first; size_t real_idx = kernel_idx.second; - MS_LOG(INFO) << "Real name" << real_node->fullname_with_scope() << "index:" << real_idx; + MS_LOG(INFO) << "Fusion info: real name: " << real_node->fullname_with_scope() << ". index:" << real_idx; nlohmann::json input_desc; GenDescJson(real_node, real_idx, real_idx, &input_desc); if (is_dynamic_input) { + // 2. dynamic input json MS_LOG(INFO) << "Node has dynamic input."; input_desc[kJDynIndex] = (i - 1); } @@ -854,7 +980,8 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode, } size_t optional_num = GetOptionalInput(cnode, is_dynamic_input); if (optional_num > 0) { - MS_LOG(INFO) << "Node has optional input."; + // 3. optional input + MS_LOG(INFO) << "Fusion info: node has optional input."; for (size_t i = 0; i < optional_num; ++i) { nlohmann::json optional_input_desc; optional_input_desc[kJName] = std::string(kOptional) + std::to_string(*index); @@ -872,7 +999,7 @@ std::vector TbeKernelBuild::GetDescOutputIndex(const std::vector &o std::vector desc_output_index = {}; for (size_t idx = 0; idx < output_used_nums.size(); ++idx) { auto output_use_num_item = output_used_nums[idx]; - MS_LOG(INFO) << "Output used num[" << idx << "] = " << output_use_num_item; + MS_LOG(INFO) << "Fusion info: output used num[" << idx << "] = " << output_use_num_item; desc_output_index.emplace_back(idx); if (output_use_num_item > 1) { desc_output_index.emplace_back(idx); @@ -887,7 +1014,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode auto output_size = AnfAlgo::GetOutputTensorNum(cnode); if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) { auto output_used_nums = AnfAlgo::GetNodeAttr>(cnode, kAttrOutputUsedNum); - MS_LOG(INFO) << "This node's output has been reused, node name: " << cnode->fullname_with_scope(); + MS_LOG(INFO) << "Fusion info: this node's output has been reused, node name: " << cnode->fullname_with_scope(); if (output_used_nums.size() != output_size) { MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")" << " is not match output used num(" << output_used_nums.size() << ")"; @@ -930,20 +1057,14 @@ bool TbeKernelBuild::GenFusionComputeJson(const mindspore::AnfNodePtr &compute_n // gen output desc std::vector output_desc_list; if (!GenFusionComputeOutputJson(cnode, &output_desc_list)) { - MS_LOG(INFO) << "Fusion Error: gen fusion output desc faild, node full name: " << cnode->fullname_with_scope(); + MS_LOG(INFO) << "Fusion Error: gen fusion output desc failed, node full name: " << cnode->fullname_with_scope(); return false; } (*compute_op_str)[kJOutputDesc] = output_desc_list; - // gen others - auto origin_type = AnfAlgo::GetCNodeName(cnode); - // replace special op type for buffer fusion op - auto type = GetRealOpType(origin_type); - (*compute_op_str)[kJtype] = type; - tbe::TbeAdapter::NormalizeFuncName(&type); - (*compute_op_str)[kJFuncName] = type; - (*compute_op_str)[kJName] = NormalizeFullScopeName(cnode->fullname_with_scope()); - (void)(*fusion_kernel_name).append("_"); - (void)(*fusion_kernel_name).append(type); + // gen common desc + GenFusionComputeCommonJson(cnode, compute_op_str, fusion_kernel_name); + // gen prebuild args + GenFusionComputePreBuildJson(cnode, compute_op_str); return true; } @@ -965,7 +1086,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, MS_EXCEPTION_IF_NULL(output_size_list); input_size_list->clear(); output_size_list->clear(); - + // cal input size for malloc for (const auto &op : fusion_op_list) { if (op[kJtype] == "Data") { const auto &data_output_desc = op[kJOutputDesc]; @@ -975,23 +1096,23 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, } auto ret = GetIOSizeImpl(data_output); input_size_list->push_back(ret); - MS_LOG(INFO) << "Fusion info: scope input name: " << op[kJName] << ", size: " << ret; + MS_LOG(INFO) << "Fusion info: input node name: " << op[kJName] << ", size: " << ret; } } } - + // cal output size for malloc for (const auto &output_node : output_nodes) { auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0); auto real_node = kernel_idx.first; size_t real_idx = kernel_idx.second; - auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope()); - MS_LOG(INFO) << "Fusion info: real node name: " << normal_name << ", real output index: " << real_idx; + auto full_name = real_node->fullname_with_scope(); + MS_LOG(INFO) << "Fusion info: real output node name: " << full_name << ", real output index: " << real_idx; for (const auto &op : fusion_op_list) { - if (op[kJName] == normal_name) { + if (op[kJName] == full_name) { auto op_output_desces = op[kJOutputDesc]; if (output_node != real_node) { // tuple_get item - MS_LOG(INFO) << "Output is a tuple getitem node"; + MS_LOG(INFO) << "Fusion info: output is a tuple get_item node"; auto output_desc = op_output_desces[real_idx]; if (output_desc[kJShape].empty()) { MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx; @@ -1001,6 +1122,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, output_size_list->push_back(ret); MS_LOG(INFO) << "Fusion info: scope output index: " << real_idx << ", size: " << ret; } else { + MS_LOG(INFO) << "Fusion info: output is self."; for (const auto &output_desc : op_output_desces) { if (output_desc[kJShape].empty()) { MS_LOG(INFO) << "Fusion info: output_desc's shape is empty, may be this node output"; diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h index 3a00169632c..d4cfe7866d1 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_build.h @@ -41,8 +41,8 @@ class TbeKernelBuild { std::vector *output_size_list); // Ub Fuison static bool GenFusionScopeJson(const std::vector &input_nodes, - const std::vector &compute_nodes, nlohmann::json *fusion_str, - std::string *fusion_kernel); + const std::vector &compute_nodes, nlohmann::json *fusion_json, + std::string *fusion_kernel_name); static bool GetIOSize(const nlohmann::json &fusion_op_list, const std::vector &output_nodes, std::vector *input_size_list, std::vector *output_size_list); @@ -61,9 +61,14 @@ class TbeKernelBuild { static std::vector GetDescOutputIndex(const std::vector &output_used_nums); static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode, std::vector *output_desc_list); + static void GenPreDescJson(nlohmann::json *output_desc); + static void GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str, + std::string *fusion_kernel_name); + static void GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str); static void GenDescJson(const std::shared_ptr &anf_node, size_t node_out_idx, size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type = kFusionNormal); + static void GenSuffixDescJson(nlohmann::json *output_desc); static void GenReusedOutputDesc(const std::shared_ptr &anf_node, size_t index, size_t output_index, nlohmann::json *output_desc); static size_t GetIOSizeImpl(const nlohmann::json &desc); @@ -76,6 +81,7 @@ class TbeKernelBuild { static bool IsDynamicInput(const CNodePtr &cnode); static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input); static std::string GetRealOpType(const std::string &origin_type); + static std::string GetNodeFusionType(const CNodePtr &cnode); }; class TbeKernelJsonCreator { @@ -84,14 +90,14 @@ class TbeKernelJsonCreator { ~TbeKernelJsonCreator() = default; bool GenTbeSingleKernelJson(const std::shared_ptr &anf_node, nlohmann::json *kernel_json); std::string json_name() { return json_name_; } + bool GenTbeAttrJson(const std::shared_ptr &anf_node, const std::shared_ptr &op_info, + nlohmann::json *attrs_json); private: bool GenTbeInputsJson(const std::shared_ptr &anf_node, const std::shared_ptr &op_info, nlohmann::json *inputs_json); bool GenTbeOutputsJson(const std::shared_ptr &anf_node, const std::shared_ptr &op_info, nlohmann::json *outputs_json); - bool GenTbeAttrJson(const std::shared_ptr &anf_node, const std::shared_ptr &op_info, - nlohmann::json *attrs_json); static void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj); bool GenInputDescJson(const std::shared_ptr &anf_node, size_t real_input_index, bool value, const std::shared_ptr &input_ptr, const string &op_input_name, size_t input_i, diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc index 79a538acd3b..41f03b7f5da 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.cc @@ -33,42 +33,6 @@ namespace mindspore { namespace kernel { using mindspore::kernel::tbe::TbeUtils; - -bool TbeOpParallelPreBuild(const std::vector &anf_nodes) { - auto build_manger = std::make_shared(); - MS_EXCEPTION_IF_NULL(build_manger); - for (const auto &anf_node : anf_nodes) { - // gen kernel json - MS_EXCEPTION_IF_NULL(anf_node); - nlohmann::json kernel_json; - TbeKernelJsonCreator creator(OP_PRE_COMPILE); - if (!creator.GenTbeSingleKernelJson(anf_node, &kernel_json)) { - MS_LOG(ERROR) << "GenTbeSingleKernelJson failed"; - return false; - } - kernel_json["compile_type"] = "pre_build"; - // op build - auto task_id = build_manger->StartCompileOp(kernel_json); - build_manger->SavePreTaskInfo(task_id, anf_node); - } - while (!build_manger->IsAllPreTaskFinish()) { - int task_id = -1; - std::string task_result; - std::string pre_build_result; - auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result); - if (!ret) { - MS_EXCEPTION(ArgumentError) << "Pre Build Failed. wait one ret:" << ret << ", task id:" << task_id; - } - - if (task_result != "Success") { - MS_EXCEPTION(ArgumentError) << "task pre compile Failed, task id:" << task_id << ", cause:" << task_result; - } - - build_manger->PreTaskFinishProcess(task_id, pre_build_result); - } - return true; -} - bool TbeOpParallelBuild(const std::vector &anf_nodes) { auto build_manger = std::make_shared(); MS_EXCEPTION_IF_NULL(build_manger); @@ -122,15 +86,8 @@ bool TbeOpParallelBuild(const std::vector &anf_nodes) { return build_manger->GenSameOpKernelMod(); } -ParallelBuildManager::ParallelBuildManager() {} - ParallelBuildManager::~ParallelBuildManager() { ResetTaskInfo(); } -void ParallelBuildManager::SavePreTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node) { - MS_LOG(INFO) << "SavePreTaskInfo, task id: " << task_id; - pre_task_map_[task_id] = anf_node; -} - void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node, const std::string &json_name, const std::vector &input_size_list, const std::vector &output_size_list, int32_t scope_id) { @@ -149,42 +106,11 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod task_map_[task_id] = task_info; } -bool ParallelBuildManager::IsAllPreTaskFinish() const { - MS_LOG(INFO) << "wait pre build process task_num: " << pre_task_map_.size(); - return pre_task_map_.empty(); -} - bool ParallelBuildManager::IsAllTaskFinish() const { MS_LOG(INFO) << "wait process task_num: " << task_map_.size(); return task_map_.empty(); } -void ParallelBuildManager::PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result) { - auto task_iter = pre_task_map_.find(task_id); - if (task_iter == pre_task_map_.end()) { - MS_EXCEPTION(ArgumentError) << "can find pre task_id:" << task_id; - } - auto node = task_iter->second; - auto builder = - std::make_shared(AnfAlgo::GetSelectKernelBuildInfo(node)); - std::string start_flag = "fusion_pattern_start"; - std::string end_flag = "fusion_pattern_end"; - int start = pre_build_result.find(start_flag); - int end = pre_build_result.find(end_flag); - if (start != -1 && end != -1 && end >= start) { - std::string result = pre_build_result.substr(start + start_flag.size(), end - start - start_flag.size()); - if (result == "") { - (void)pre_task_map_.erase(task_iter); - return; - } - transform(result.begin(), result.end(), result.begin(), ::toupper); - FusionType fusion_type = tbe::GetFusionType(result); - builder->SetFusionType(fusion_type); - AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get()); - } - (void)pre_task_map_.erase(task_iter); -} - std::pair ParallelBuildManager::TaskFinishProcess(int32_t task_id, bool set_kernel_mod) { auto task_iter = task_map_.find(task_id); if (task_iter == task_map_.end()) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h index a026f186c05..a7a28d45025 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h @@ -28,7 +28,6 @@ namespace mindspore { namespace kernel { -bool TbeOpParallelPreBuild(const std::vector &anf_nodes); bool TbeOpParallelBuild(const std::vector &anf_nodes); struct KernelBuildTaskInfo { @@ -42,9 +41,8 @@ struct KernelBuildTaskInfo { class ParallelBuildManager { public: - ParallelBuildManager(); + ParallelBuildManager() = default; ~ParallelBuildManager(); - void SavePreTaskInfo(int32_t task_id, const AnfNodePtr &anf_node); void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name, const std::vector &input_size_list, const std::vector &output_size_list, int32_t scope_id = 0); @@ -54,10 +52,7 @@ class ParallelBuildManager { bool SearchInCache(const std::string &json_name, const std::string &processor, const std::vector &input_size_list, const std::vector &output_size_list, AnfNode *node) const; - - bool IsAllPreTaskFinish() const; bool IsAllTaskFinish() const; - void PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result); std::pair TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true); KernelModPtr GenKernelMod(const string &json_name, const string &processor, const std::vector &input_size_list, const std::vector &output_size_list, diff --git a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc index 5662fdb4467..47b82d1435a 100644 --- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc +++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc @@ -1187,6 +1187,19 @@ TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputPrecision(const AnfNodePtr &node, s return GetCNodeOutputPrecision(kernel_with_index.first); } +bool AnfRuntimeAlgorithm::IsDynamicShape(const AnfNodePtr &node) { + if (!node->isa()) { + return false; + } + auto cnode = node->cast(); + MS_EXCEPTION_IF_NULL(cnode); + auto has_attr = AnfAlgo::HasNodeAttr(kAttrIsDynamicShape, cnode); + if (!has_attr) { + return false; + } + return AnfAlgo::GetNodeAttr(node, kAttrIsDynamicShape); +} + bool AnfRuntimeAlgorithm::IsCondControlKernel(const CNodePtr &node) { MS_EXCEPTION_IF_NULL(node); if (node->inputs().empty()) { diff --git a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h index 2fff066166a..d4a5f00a259 100644 --- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h +++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.h @@ -217,6 +217,7 @@ class AnfRuntimeAlgorithm { static TypeId GetCNodeOutputPrecision(const AnfNodePtr &node); // get fix output precision from prev node, input_idx is the input index of current node related to prev node. static TypeId GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx); + static bool IsDynamicShape(const AnfNodePtr &node); static bool IsCondControlKernel(const CNodePtr &node); static bool IsIndependentNode(const CNodePtr &node); }; diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc index 019fabecbdc..fd240d41cbb 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_session.cc @@ -445,7 +445,6 @@ void AscendSession::InitRuntimeResource() { } void AscendSession::HardwareOptimize(const std::shared_ptr &kernel_graph) const { - device::ascend::KernelPreBuild(kernel_graph.get()); MS_LOG(INFO) << "HardwareOptimize start!"; opt::AscendBackendOptimization(kernel_graph); opt::AscendGraphKernelCommonProcess(kernel_graph); diff --git a/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc b/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc index d5b76edcf05..833104a1c50 100644 --- a/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc +++ b/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.cc @@ -19,7 +19,8 @@ #include #include #include - +#include +#include #include "runtime/device/ascend/kernel_select_ascend.h" #include "runtime/device/kernel_info.h" #include "backend/kernel_compiler/kernel.h" @@ -61,32 +62,6 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) { return kernel_mod_ptr; } -static bool KernelPreBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) { - MS_EXCEPTION_IF_NULL(kernel_graph_ptr); - std::vector tbe_nodes; - for (const auto &anf_node : kernel_graph_ptr->execution_order()) { - MS_EXCEPTION_IF_NULL(anf_node); - if (!AnfAlgo::IsRealKernel(anf_node)) { - continue; - } - KernelType kernel_type = AnfAlgo::GetKernelType(anf_node); - switch (kernel_type) { - case KernelType::TBE_KERNEL: { - if (AnfAlgo::GetKernelMod(anf_node) == nullptr && - AnfAlgo::GetFusionType(anf_node) == kernel::FusionType::DYNAMIC) { - tbe_nodes.push_back(anf_node); - } - break; - } - default: { - break; - } - } - } - bool ret = kernel::TbeOpParallelPreBuild(tbe_nodes); - return ret; -} - static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) { MS_EXCEPTION_IF_NULL(kernel_graph_ptr); std::vector tbe_nodes; @@ -237,12 +212,6 @@ static bool IsAtomicNode(const CNodePtr &kernel_node) { return !(workspace_indexs.empty() && output_indexs.empty()); } -bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) { - MS_EXCEPTION_IF_NULL(kernel_graph_ptr); - bool ret = device::ascend::KernelPreBuildParallelCompile(kernel_graph_ptr); - return ret; -} - bool KernelBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) { MS_EXCEPTION_IF_NULL(kernel_graph_ptr); TbeUtils::LoadCache(); diff --git a/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h b/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h index b478f59c14d..6c41eed460b 100644 --- a/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h +++ b/mindspore/ccsrc/runtime/device/ascend/kernel_build_ascend.h @@ -22,10 +22,6 @@ namespace mindspore { namespace device { namespace ascend { -/** - * @brief kernel pre build for ascend. - */ -bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr); /** * @brief kernel build for ascend. */ diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h index 5a395eb018b..f7d905f649f 100644 --- a/mindspore/ccsrc/utils/utils.h +++ b/mindspore/ccsrc/utils/utils.h @@ -32,6 +32,7 @@ namespace mindspore { // op name. Op which not exists in operator/ops.h, so define it's name here constexpr auto kFour2FiveOpName = "Four2Five"; constexpr auto kFive2FourOpName = "Five2Four"; +constexpr auto kConv2DOpName = "Conv2D"; constexpr auto kConvBN1OpName = "ConvBN1"; constexpr auto kBN2AddReluOpName = "BN2AddRelu"; constexpr auto kBN2ReLUOpName = "BN2Relu"; @@ -273,6 +274,7 @@ constexpr auto kAttrPadDimSize = "pad_dim_size"; constexpr auto kAttrNumSegments = "num_segments"; constexpr auto kAttrBegin = "begin"; constexpr auto kAttrSize = "size"; +constexpr auto kAttrIsDynamicShape = "is_dynamic_shape"; // attr value constexpr auto kValueTargetSwitch = "target_switch"; diff --git a/mindspore/ops/_op_impl/aicpu/__init__.py b/mindspore/ops/_op_impl/aicpu/__init__.py index b321db47e08..bb63d4bf32e 100644 --- a/mindspore/ops/_op_impl/aicpu/__init__.py +++ b/mindspore/ops/_op_impl/aicpu/__init__.py @@ -13,6 +13,7 @@ # limitations under the License. """aicpu ops""" +from .unique import _unique_aicpu from .init_data_set_queue import _init_data_set_queue_aicpu from .embedding_lookup import _embedding_lookup_aicpu from .padding import _padding_aicpu diff --git a/mindspore/ops/_op_impl/aicpu/unique.py b/mindspore/ops/_op_impl/aicpu/unique.py new file mode 100644 index 00000000000..849e9696093 --- /dev/null +++ b/mindspore/ops/_op_impl/aicpu/unique.py @@ -0,0 +1,31 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Unique op""" +from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType + +unique_op_info = AiCPURegOp("Unique") \ + .fusion_type("OPAQUE") \ + .input(0, "x", "required") \ + .output(0, "y", "required") \ + .output(1, "idx", "required") \ + .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \ + .dtype_format(DataType.I64_Default, DataType.I64_Default, DataType.I64_Default) \ + .get_op_info() + +@op_info_register(unique_op_info) +def _unique_aicpu(): + """Unique AiCPU register""" + return diff --git a/mindspore/ops/_op_impl/tbe/matmul.py b/mindspore/ops/_op_impl/tbe/matmul.py index 0f68fa4c9da..e773191ae88 100644 --- a/mindspore/ops/_op_impl/tbe/matmul.py +++ b/mindspore/ops/_op_impl/tbe/matmul.py @@ -17,7 +17,7 @@ from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType matmul_op_info = TBERegOp("MatMul") \ - .fusion_type("ELEMWISE") \ + .fusion_type("OPAQUE") \ .async_flag(False) \ .binfile_name("matmul.so") \ .compute_cost(10) \ diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py index e4a47fc90c2..05d0b77eaad 100644 --- a/mindspore/ops/operations/__init__.py +++ b/mindspore/ops/operations/__init__.py @@ -91,6 +91,7 @@ from ._thor_ops import (CusBatchMatMul, CusCholeskyTrsm, CusFusedAbsMax1, CusImg from .sparse_ops import SparseToDense __all__ = [ + 'Unique', 'ReverseSequence', 'EditDistance', 'CropAndResize', diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py index bfc2f316272..c42c2505520 100644 --- a/mindspore/ops/operations/array_ops.py +++ b/mindspore/ops/operations/array_ops.py @@ -597,9 +597,9 @@ class Unique(Primitive): containing indices of elements in the input coressponding to the output tensor. Examples: - >>> x = Tensor(np.array([1, 2, 5, 2]), mindspore.float32) + >>> x = Tensor(np.array([1, 2, 5, 2]), mindspore.int32) >>> out = P.Unique()(x) - (Tensor([1, 2, 5], mindspore.int32), Tensor([0, 1, 2, 1], mindspore.float32)) + (Tensor([1, 2, 5], mindspore.int32), Tensor([0, 1, 2, 1], mindspore.int32)) """ @prim_attr_register def __init__(self): diff --git a/tests/ut/cpp/stub/tdt/tdt_mock.cc b/tests/ut/cpp/stub/tdt/tdt_mock.cc index 45725de173a..6b9c6f95a4b 100644 --- a/tests/ut/cpp/stub/tdt/tdt_mock.cc +++ b/tests/ut/cpp/stub/tdt/tdt_mock.cc @@ -35,39 +35,5 @@ StatusFactory::StatusFactory() {} std::mutex& StatusFactory::GetMutex() { return GetInstance()->rwMutex_; } -TsdClient* TsdClient::GetInstance() { - static TsdClient instance; - return &instance; -} - -/** - * @ingroup TsdClient - * @brief 构造函数 - */ -TsdClient::TsdClient() { rankSize_ = 1; } - -/** - * @ingroup TsdClient - * @brief 析构函数 - */ -TsdClient::~TsdClient() = default; - -/** - * @ingroup TsdClient - * @brief framework发送拉起hccp和computer process的命令 - * @param [in] phyDeviceId : FMK传入物理ID - * @param [in] phyDeviceId : FMK传入rankSize - * @return TDT_OK:成功 或者其他错误码 - */ -TDT_StatusT TsdClient::Open(const uint32_t deviceId, const uint32_t rankSize) { return TDT_OK; } - -/** - * @ingroup TsdClient - * @brief 通知TsdClient关闭相关资源 - * @param 无 - * @return TDT_OK:成功 或者其他错误码 - */ -TDT_StatusT TsdClient::Close() { return TDT_OK; } - } // namespace tdt #endif // TDT_MOCK_H