forked from mindspore-Ecosystem/mindspore
update aicpu proto and update module: graphengine
Support Dynamic Shape Aicpu Run Package adapt tensorengin modify, fix ub fusion
This commit is contained in:
parent
b7425d3e0c
commit
bd527a331d
|
@ -1 +1 @@
|
|||
Subproject commit 622af6c1c50034bea5a08bd409c5a410782bfe53
|
||||
Subproject commit 2dbfefcdd0d4b958801403dbaf9efe46447dccd2
|
|
@ -17,8 +17,6 @@ import json
|
|||
import os
|
||||
import sys
|
||||
from te.platform.cce_conf import te_set_version
|
||||
from te.platform.fusion_manager import op_build_cfg_dis, op_build_cfg_en, set_current_op_name, \
|
||||
init_op_pattern, set_op_params, set_op_build_type, get_op_pattern, set_current_op_func_name
|
||||
from te.platform.fusion_util import fusion_op
|
||||
from common import check_kernel_info, get_args, get_build_in_impl_path, get_ddk_version
|
||||
|
||||
|
@ -27,7 +25,6 @@ build_in_impl_path = get_build_in_impl_path()
|
|||
|
||||
# op function list
|
||||
op_build = "compile"
|
||||
op_pre_build = "pre_build"
|
||||
fusion_pattern_start_flag = "fusion_pattern_start"
|
||||
fusion_pattern_end_flag = "fusion_pattern_end"
|
||||
|
||||
|
@ -83,19 +80,7 @@ def build_op(build_type, json_str):
|
|||
else:
|
||||
op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0)
|
||||
# get function
|
||||
if build_type == op_pre_build:
|
||||
# set op parameter
|
||||
op_build_cfg_dis()
|
||||
set_current_op_func_name(op_name)
|
||||
set_current_op_name(kernel_name)
|
||||
init_op_pattern()
|
||||
set_op_params(*outputs_args, *attrs_args, kernel_name=kernel_name)
|
||||
set_op_build_type('prebuild')
|
||||
if custom_flag:
|
||||
py_fn_name = kernel_info['op_info']['name']
|
||||
else:
|
||||
py_fn_name = op_name
|
||||
elif build_type == op_build:
|
||||
if build_type == op_build:
|
||||
if custom_flag:
|
||||
py_fn_name = kernel_info['op_info']['name']
|
||||
else:
|
||||
|
@ -106,13 +91,6 @@ def build_op(build_type, json_str):
|
|||
if op_func is None:
|
||||
raise ValueError("Op:{} function {} is not supported by Tbe.".format(op_name, build_type))
|
||||
|
||||
# pre build
|
||||
if build_type == op_pre_build:
|
||||
op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
|
||||
# disable only pattern configuration
|
||||
op_build_cfg_en()
|
||||
return get_op_pattern()
|
||||
|
||||
# call function
|
||||
if kernel_name[0:19] == "bounding_box_encode":
|
||||
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name_val=kernel_name)
|
||||
|
@ -120,8 +98,6 @@ def build_op(build_type, json_str):
|
|||
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
|
||||
|
||||
except Exception as e:
|
||||
if build_type == op_pre_build:
|
||||
op_build_cfg_en()
|
||||
raise RuntimeError(e)
|
||||
|
||||
|
||||
|
@ -136,14 +112,9 @@ def compile_fusion_op(json_str):
|
|||
Exception: If specific keyword is not found.
|
||||
"""
|
||||
args = json.loads(json_str)
|
||||
te_set_version(ddk_version)
|
||||
if 'fusion_op' not in args or not args['fusion_op']:
|
||||
raise ValueError("Json string Errors, key:fusion_op not found.")
|
||||
if 'prebuild_ops' not in args or not args['prebuild_ops']:
|
||||
raise ValueError("Json string Errors, key:prebuild_ops not found.")
|
||||
|
||||
pre_build_op_list = args['prebuild_ops']
|
||||
for op in pre_build_op_list:
|
||||
build_op(op_pre_build, json.dumps(op))
|
||||
fusion_op_arg = args['fusion_op']
|
||||
return fusion_op(json.dumps(fusion_op_arg))
|
||||
|
||||
|
@ -159,8 +130,6 @@ def compile_with_json(json_str):
|
|||
json_info = json.loads(json_str)
|
||||
if "fusion_op" in json_info:
|
||||
ret = compile_fusion_op(json_str)
|
||||
elif "compile_type" in json_info:
|
||||
ret = build_op(op_pre_build, json_str)
|
||||
else:
|
||||
ret = build_op(op_build, json_str)
|
||||
return ret
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
#include <vector>
|
||||
#include <memory>
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <climits>
|
||||
#include "runtime/device/kernel_runtime.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
|
||||
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
|
||||
|
@ -218,7 +220,7 @@ void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef
|
|||
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
|
||||
dim->set_size((::google::protobuf::int64)item);
|
||||
}
|
||||
node_inputs->set_tensor_type((mindspore::DataType)input_data_type);
|
||||
node_inputs->set_tensor_type(input_data_type);
|
||||
node_inputs->set_mem_device("HBM");
|
||||
}
|
||||
}
|
||||
|
@ -245,7 +247,7 @@ void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef
|
|||
}
|
||||
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
|
||||
int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
|
||||
node_outputs->set_tensor_type((mindspore::DataType)output_data_type);
|
||||
node_outputs->set_tensor_type(output_data_type);
|
||||
node_outputs->set_mem_device("HBM");
|
||||
}
|
||||
}
|
||||
|
@ -287,6 +289,109 @@ bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
|
||||
if (!anf_node->isa<CNode>()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!AnfAlgo::IsDynamicShape(anf_node)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "CreateExtInfo start, " << anf_node->fullname_with_scope();
|
||||
|
||||
int32_t unknown_shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
|
||||
uint64_t ext_info_head_len = kExtInfoHeadSize;
|
||||
std::string ext_info;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
|
||||
|
||||
// 1.addr:unknown shape type
|
||||
uint64_t ext_info_len = ext_info.size();
|
||||
ext_info_len += ext_info_head_len + sizeof(int32_t);
|
||||
|
||||
// 2.addr:input ShapeAndType
|
||||
ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType);
|
||||
|
||||
// 3.addr:output ShapeAndType
|
||||
ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType);
|
||||
|
||||
uint64_t ext_info_offset = ext_info.size();
|
||||
ext_info.resize(ext_info_len, 0);
|
||||
char *ext_info_buf = ext_info.data();
|
||||
|
||||
// deal1: unknown shape type
|
||||
ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
|
||||
info->infoType = FWK_ADPT_EXT_SHAPE_TYPE;
|
||||
info->infoLen = sizeof(int32_t);
|
||||
ext_info_offset += ext_info_head_len;
|
||||
int32_t *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset);
|
||||
*shape_type = unknown_shape_type;
|
||||
ext_info_offset += info->infoLen;
|
||||
|
||||
// deal2:input ShapeAndType
|
||||
info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
|
||||
info->infoType = FWK_ADPT_EXT_INPUT_SHAPE;
|
||||
info->infoLen = input_num * sizeof(ShapeAndType);
|
||||
ext_info_offset += ext_info_head_len;
|
||||
|
||||
ShapeAndType *inputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
|
||||
for (size_t input_index = 0; input_index < input_num; input_index++) {
|
||||
TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);
|
||||
std::vector<size_t> input_shape;
|
||||
int32_t input_data_type;
|
||||
if (input_type == kObjectTypeString) {
|
||||
auto cnode = anf_node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto input_node = cnode->inputs()[input_index + 1];
|
||||
auto value_ptr = GetValueNode(input_node);
|
||||
auto value = GetValue<std::string>(value_ptr);
|
||||
input_shape.push_back(1);
|
||||
input_shape.push_back(value.size());
|
||||
input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown);
|
||||
} else {
|
||||
input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);
|
||||
input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type);
|
||||
}
|
||||
inputs[input_index].type = input_data_type;
|
||||
|
||||
size_t input_shape_index = 0;
|
||||
for (; input_shape_index < input_shape.size(); input_shape_index++) {
|
||||
inputs[input_index].dims[input_shape_index] = SizeToLong(input_shape[input_shape_index]);
|
||||
}
|
||||
if (input_shape.size() < kMaxShapeDims) {
|
||||
inputs[input_index].dims[input_shape_index] = LLONG_MIN;
|
||||
}
|
||||
}
|
||||
ext_info_offset += info->infoLen;
|
||||
|
||||
// deal3:output ShapeAndType
|
||||
info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
|
||||
info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE;
|
||||
info->infoLen = output_num * sizeof(ShapeAndType);
|
||||
ext_info_offset += ext_info_head_len;
|
||||
|
||||
ShapeAndType *outputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
|
||||
for (size_t output_index = 0; output_index < output_num; output_index++) {
|
||||
std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);
|
||||
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
|
||||
int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
|
||||
outputs[output_index].type = output_data_type;
|
||||
|
||||
size_t output_shape_index = 0;
|
||||
for (; output_shape_index < output_shape.size(); output_shape_index++) {
|
||||
outputs[output_index].dims[output_shape_index] = SizeToLong(output_shape[output_shape_index]);
|
||||
}
|
||||
if (output_shape_index < kMaxShapeDims) {
|
||||
outputs[output_index].dims[output_shape_index] = LLONG_MIN;
|
||||
}
|
||||
}
|
||||
|
||||
// set ext info
|
||||
kernel_mod_ptr->SetExtInfo(ext_info);
|
||||
return true;
|
||||
}
|
||||
|
||||
KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
|
||||
|
@ -300,6 +405,11 @@ KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
|
|||
if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) {
|
||||
MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
|
||||
}
|
||||
|
||||
if (!CreateExtInfo(anf_node, kernel_mod_ptr)) {
|
||||
MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
|
||||
}
|
||||
|
||||
if (!SetIOSize(anf_node, kernel_mod_ptr)) {
|
||||
MS_LOG(EXCEPTION) << "Set input output size list failed.";
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ AicpuOpKernelMod::~AicpuOpKernelMod() {
|
|||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
ext_info_.clear();
|
||||
}
|
||||
|
||||
void AicpuOpKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
|
||||
|
@ -54,6 +55,7 @@ const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { retu
|
|||
void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; }
|
||||
void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; }
|
||||
void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); }
|
||||
void AicpuOpKernelMod::SetExtInfo(const std::string &ext_info) { ext_info_ = ext_info; }
|
||||
void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; }
|
||||
void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
|
@ -84,16 +86,30 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs
|
|||
|
||||
auto node_def_len = node_def_str_.length();
|
||||
param_len += node_def_len;
|
||||
param_len += sizeof(uint32_t);
|
||||
|
||||
AicpuParamHead aicpu_param_head;
|
||||
aicpu_param_head.length = param_len;
|
||||
aicpu_param_head.ioAddrNum = io_addrs_num;
|
||||
|
||||
if (ext_info_.empty()) {
|
||||
MS_LOG(INFO) << "Static Shape Kernel";
|
||||
aicpu_param_head.extInfoLength = 0;
|
||||
aicpu_param_head.extInfoAddr = 0;
|
||||
} else {
|
||||
MS_LOG(INFO) << "Dynamic Kernel Ext Info size:" << ext_info_.size();
|
||||
}
|
||||
|
||||
// Create taskArgs: AicpuParamHead + ioAddrs + notifyId + customizedAttr
|
||||
AicpuParamHead paramHead = {static_cast<uint32_t>(param_len), static_cast<uint32_t>(io_addrs_num)};
|
||||
args_.clear();
|
||||
(void)args_.append(reinterpret_cast<const char *>(¶mHead), sizeof(AicpuParamHead));
|
||||
(void)args_.append(reinterpret_cast<const char *>(&aicpu_param_head), sizeof(AicpuParamHead));
|
||||
// TaskArgs append ioAddrs
|
||||
if (io_addrs_size != 0) {
|
||||
(void)args_.append(reinterpret_cast<const char *>(io_addrs.data()), io_addrs_size);
|
||||
}
|
||||
|
||||
// size for node_def
|
||||
args_.append(reinterpret_cast<const char *>(&node_def_len), sizeof(uint32_t));
|
||||
|
||||
// When it's aicpu customized ops, taskArgs should append customized attr
|
||||
if (node_def_len != 0) {
|
||||
(void)args_.append(reinterpret_cast<const char *>(node_def_str_.data()), node_def_len);
|
||||
|
@ -145,8 +161,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
|
|||
node_name_ = kTopKV2;
|
||||
}
|
||||
|
||||
AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
|
||||
kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump());
|
||||
AicpuTaskInfoPtr task_info_ptr =
|
||||
make_shared<ge::model_runner::AicpuTaskInfo>(kernel_name_, stream_id, node_so_, node_name_, node_def_str_,
|
||||
ext_info_, input_data_addrs, output_data_addrs, NeedDump());
|
||||
|
||||
MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
|
||||
return {task_info_ptr};
|
||||
|
|
|
@ -36,6 +36,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
|
|||
void SetOutputList(const std::vector<int64_t> &outputList);
|
||||
void SetAnfNode(const AnfNodePtr &anf_node);
|
||||
void SetNodeDef(const std::string &nodeDef);
|
||||
void SetExtInfo(const std::string &ext_info);
|
||||
void SetNodeName(const std::string &node_name);
|
||||
|
||||
/**
|
||||
|
@ -58,6 +59,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
|
|||
std::string node_def_str_;
|
||||
std::string node_name_;
|
||||
std::string node_so_;
|
||||
std::string ext_info_;
|
||||
std::vector<int64_t> inputList_;
|
||||
std::vector<int64_t> outputList_;
|
||||
AnfNodePtr anf_node_;
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
#include <map>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
constexpr auto kInitDataSetQueue = "InitDataSetQueue";
|
||||
|
@ -50,6 +49,36 @@ struct AicpuParamHead {
|
|||
uint64_t extInfoAddr; // extInfo address
|
||||
} __attribute__((packed));
|
||||
|
||||
const uint32_t kExtInfoHeadSize = 8;
|
||||
struct ExtInfo {
|
||||
int32_t infoType; // extend type
|
||||
uint32_t infoLen; // length for infoMsg
|
||||
char infoMsg[0]; // extend value
|
||||
} __attribute__((packed));
|
||||
|
||||
// Extent info ShapeAndType
|
||||
const uint32_t kMaxShapeDims = 8;
|
||||
struct ShapeAndType {
|
||||
int32_t type;
|
||||
int64_t dims[kMaxShapeDims];
|
||||
} __attribute__((packed));
|
||||
|
||||
// Extend Info type for task
|
||||
enum FWKTaskExtInfoType {
|
||||
FWK_ADPT_EXT_SHAPE_TYPE = 0,
|
||||
FWK_ADPT_EXT_INPUT_SHAPE,
|
||||
FWK_ADPT_EXT_OUTPUT_SHAPE,
|
||||
FWK_ADPT_EXT_INVALID
|
||||
};
|
||||
|
||||
// for unknown shape op type
|
||||
enum UnknowShapeOpType {
|
||||
DEPEND_IN_SHAPE = 1, // op out shape get by input shape
|
||||
DEPEND_CONST_VALUE = 2, // op out shape get by const op value
|
||||
DEPEND_SHAPE_RANGE = 3, // op out shape get by range
|
||||
DEPEND_COMPUTE = 4 // op out shape get by totally computing
|
||||
};
|
||||
|
||||
class AicpuOpUtil {
|
||||
public:
|
||||
static int MsTypeToProtoType(TypeId ms_type);
|
||||
|
|
|
@ -26,7 +26,7 @@ message AttrValue {
|
|||
repeated int64 i = 3 [ packed = true ]; //"array(int)"
|
||||
repeated float f = 4 [ packed = true ]; //"array(float)"
|
||||
repeated bool b = 5 [ packed = true ]; //"array(bool)"
|
||||
repeated DataType type = 6 [ packed = true ]; //"array(type)"
|
||||
repeated int32 type = 6 [ packed = true ]; //"array(type)"
|
||||
repeated TensorShape shape = 7; //"array(shape)"
|
||||
repeated Tensor tensor = 8; //"array(tensor)"
|
||||
}
|
||||
|
|
|
@ -18,9 +18,16 @@ package mindspore;
|
|||
import "attr.proto";
|
||||
import "tensor.proto";
|
||||
|
||||
message DynamicIdx {
|
||||
int32 idx = 1;
|
||||
int32 num = 2;
|
||||
}
|
||||
|
||||
message NodeDef {
|
||||
string op = 2;
|
||||
map<string, AttrValue> attrs = 3;
|
||||
repeated Tensor inputs = 4;
|
||||
repeated Tensor outputs = 5;
|
||||
map<string, DynamicIdx> dym_inputs = 6;
|
||||
map<string, DynamicIdx> dym_outputs = 7;
|
||||
}
|
||||
|
|
|
@ -26,9 +26,12 @@ message Tensor {
|
|||
TensorShape tensor_shape = 1;
|
||||
|
||||
// tensor content data type
|
||||
DataType tensor_type = 2;
|
||||
int32 tensor_type = 2;
|
||||
|
||||
// tensor memory device
|
||||
// data located memory device , "DDR" "HBM" OR "NONE"
|
||||
string mem_device = 3;
|
||||
string name = 4;
|
||||
uint64 data_ptr = 5;
|
||||
uint64 data_size = 6;
|
||||
}
|
||||
|
|
|
@ -31,5 +31,5 @@ message TensorShape {
|
|||
bool unknown_rank = 3;
|
||||
|
||||
// data format "NHWC" "NCHW" "NC1HWC0" OR "NONE"
|
||||
string data_format = 4;
|
||||
int32 data_format = 4;
|
||||
};
|
||||
|
|
|
@ -19,17 +19,30 @@ option cc_enable_arenas = true;
|
|||
package mindspore;
|
||||
|
||||
enum DataType {
|
||||
MS_UNKNOWN = 0;
|
||||
MS_BOOL = 1;
|
||||
MS_FLOAT32 = 0;
|
||||
MS_FLOAT16 = 1;
|
||||
MS_INT8 = 2;
|
||||
MS_UINT8 = 3;
|
||||
MS_INT16 = 4;
|
||||
MS_UINT16 = 5;
|
||||
MS_INT32 = 6;
|
||||
MS_UINT32 = 7;
|
||||
MS_INT64 = 8;
|
||||
MS_UINT64 = 9;
|
||||
MS_FLOAT16 = 10;
|
||||
MS_FLOAT32 = 11;
|
||||
MS_FLOAT64 = 12;
|
||||
MS_INT32 = 3;
|
||||
MS_UINT8 = 4;
|
||||
MS_INT16 = 6;
|
||||
MS_UINT16 = 7;
|
||||
MS_UINT32 = 8;
|
||||
MS_INT64 = 9;
|
||||
MS_UINT64 = 10;
|
||||
MS_FLOAT64 = 11;
|
||||
MS_BOOL = 12;
|
||||
MS_STRING = 13;
|
||||
MS_DUAL_SUB_INT8 = 14;
|
||||
MS_DUAL_SUB_UINT8 = 15;
|
||||
MS_COMPLEX64 = 16;
|
||||
MS_COMPLEX128 = 17;
|
||||
MS_QINT8 = 18;
|
||||
MS_QINT16 = 19;
|
||||
MS_QINT32 = 20;
|
||||
MS_QUINT8 = 21;
|
||||
MS_QUINT16 = 22;
|
||||
MS_RESOURCE = 23;
|
||||
MS_STRING_REF = 24;
|
||||
MS_DUAL = 25;
|
||||
MS_UNKNOWN = 26;
|
||||
}
|
||||
|
|
|
@ -37,7 +37,6 @@ enum FusionType {
|
|||
COMMREDUCE,
|
||||
SEGMENT,
|
||||
OPAQUE,
|
||||
DYNAMIC,
|
||||
UNKNOWN_FUSION_TYPE = -1,
|
||||
};
|
||||
enum OpPattern {
|
||||
|
@ -80,8 +79,8 @@ class KernelPack {
|
|||
bool LoadKernelMeta(const std::string &json_f, const std::string &processor);
|
||||
bool ReadFromJsonFile(const std::string &json_f, const std::string &processor);
|
||||
const std::string Serialize() const;
|
||||
const FlexArray *const GetJson() const { return json_; }
|
||||
const FlexArray *const GetKernel() const { return kernel_; }
|
||||
const FlexArray *GetJson() const { return json_; }
|
||||
const FlexArray *GetKernel() const { return kernel_; }
|
||||
~KernelPack() {
|
||||
if (json_) {
|
||||
delete[] json_;
|
||||
|
|
|
@ -19,53 +19,36 @@
|
|||
#include <map>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_utils.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
|
||||
#include "utils/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
using mindspore::kernel::tbe::TbeUtils;
|
||||
static bool GenPreBuildKernelJson(const std::vector<AnfNodePtr> &compute_nodes,
|
||||
std::vector<nlohmann::json> *prebuild_op_list) {
|
||||
MS_EXCEPTION_IF_NULL(prebuild_op_list);
|
||||
TbeKernelJsonCreator creator(PREBUILD);
|
||||
for (const auto &anf_node : compute_nodes) {
|
||||
nlohmann::json prebuild;
|
||||
if (!creator.GenTbeSingleKernelJson(anf_node, &prebuild)) {
|
||||
MS_LOG(ERROR) << "GenTbeSingleKernelJson failed";
|
||||
return false;
|
||||
}
|
||||
(*prebuild_op_list).push_back(prebuild);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) {
|
||||
MS_LOG(INFO) << "kernel fusion build start, scope size:" << fusion_scopes.size();
|
||||
std::map<int32_t, KernelModPtr> kernel_mod_ret;
|
||||
auto build_manger = std::make_shared<ParallelBuildManager>();
|
||||
MS_EXCEPTION_IF_NULL(build_manger);
|
||||
for (const auto &fusion_scope_iter : fusion_scopes) {
|
||||
auto scope_id = fusion_scope_iter.scope_id;
|
||||
string fusion_kernel_name;
|
||||
nlohmann::json fusion_op;
|
||||
string fusion_kernel = "te_fusion";
|
||||
if (!TbeKernelBuild::GenFusionScopeJson(fusion_scope_iter.input_nodes, fusion_scope_iter.compute_nodes, &fusion_op,
|
||||
&fusion_kernel)) {
|
||||
&fusion_kernel_name)) {
|
||||
continue;
|
||||
}
|
||||
// gen kernel_name & check cache
|
||||
std::string json_str = fusion_op.dump();
|
||||
size_t hash_id = std::hash<std::string>()(json_str);
|
||||
auto json_name = fusion_kernel.append("_").append(std::to_string(hash_id));
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
||||
auto json_name =
|
||||
fusion_kernel_name.append("_").append(std::to_string(hash_id)).append("_").append(std::to_string(device_id));
|
||||
fusion_op["fusion_op_name"] = json_name;
|
||||
// gen json for prebuild
|
||||
std::vector<nlohmann::json> prebuild_op_list;
|
||||
if (!GenPreBuildKernelJson(fusion_scope_iter.compute_nodes, &prebuild_op_list)) {
|
||||
continue;
|
||||
}
|
||||
// get io size
|
||||
std::vector<size_t> input_size_list;
|
||||
std::vector<size_t> output_size_list;
|
||||
|
@ -80,20 +63,20 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
|
|||
auto kernel_mod =
|
||||
build_manger->GenKernelMod(json_name, tbe::kProcessorAiCore, input_size_list, output_size_list, kernel_pack);
|
||||
if (kernel_mod != nullptr) {
|
||||
kernel_mod_ret[scope_id] = kernel_mod;
|
||||
kernel_mod_ret[fusion_scope_iter.scope_id] = kernel_mod;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// fusion build
|
||||
nlohmann::json fusion_json;
|
||||
fusion_json["fusion_op"] = fusion_op;
|
||||
fusion_json["prebuild_ops"] = prebuild_op_list;
|
||||
auto task_id = build_manger->StartCompileOp(fusion_json);
|
||||
TbeUtils::SaveJsonInfo(json_name, fusion_json.dump());
|
||||
if (task_id < 0) {
|
||||
MS_EXCEPTION(ArgumentError) << "start compile failed.";
|
||||
}
|
||||
build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list, scope_id);
|
||||
build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list,
|
||||
fusion_scope_iter.scope_id);
|
||||
}
|
||||
|
||||
int build_failed_num = 0;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
|
@ -25,11 +26,9 @@ namespace kernel {
|
|||
* @brief fuse op and return a callable mod
|
||||
*/
|
||||
struct FusionScopeInfo {
|
||||
FusionScopeInfo() {}
|
||||
FusionScopeInfo(int32_t id, const std::vector<AnfNodePtr> &in, const std::vector<AnfNodePtr> &comp,
|
||||
const std::vector<AnfNodePtr> &out)
|
||||
: scope_id(id), input_nodes(in), compute_nodes(comp), output_nodes(out) {}
|
||||
int32_t scope_id;
|
||||
FusionScopeInfo(int32_t id, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, std::vector<AnfNodePtr> out)
|
||||
: scope_id(id), input_nodes(std::move(in)), compute_nodes(std::move(comp)), output_nodes(std::move(out)) {}
|
||||
int32_t scope_id{};
|
||||
std::vector<AnfNodePtr> input_nodes;
|
||||
std::vector<AnfNodePtr> compute_nodes;
|
||||
std::vector<AnfNodePtr> output_nodes;
|
||||
|
|
|
@ -40,14 +40,13 @@ class OpLib {
|
|||
|
||||
private:
|
||||
static bool RegOpFromLocalInfo();
|
||||
static bool DecodeOpInfo(const nlohmann::json &obj, const OpImplyType imply_type, const std::string &impl_path);
|
||||
static bool DecodeAttr(const nlohmann::json &obj, const OpImplyType imply_type,
|
||||
const std::shared_ptr<OpInfo> &op_info);
|
||||
static bool DecodeOpInfo(const nlohmann::json &obj, OpImplyType imply_type, const std::string &impl_path);
|
||||
static bool DecodeAttr(const nlohmann::json &obj, OpImplyType imply_type, const std::shared_ptr<OpInfo> &op_info);
|
||||
static bool DecodeDtypeFormat(const nlohmann::json &dtype_format, const std::shared_ptr<OpIOInfo> &op_io,
|
||||
size_t index);
|
||||
static void DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
|
||||
static void DecodeAKGSpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
|
||||
static bool DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply_type, const OpIOType io_type,
|
||||
static bool DecodeInputOutput(const nlohmann::json &obj, OpImplyType imply_type, OpIOType io_type,
|
||||
const std::shared_ptr<OpInfo> &op_info, const nlohmann::json &dtype_format);
|
||||
static bool GetRefInfo(const std::shared_ptr<OpInfo> &op_info);
|
||||
static bool CheckRepetition(const std::shared_ptr<OpInfo> &op_info);
|
||||
|
|
|
@ -173,7 +173,7 @@ void TbeAdapter::NormalizeFuncName(std::string *func_name) {
|
|||
*func_name = name_tmp;
|
||||
auto iter = tbe_func_adapter_map.find(*func_name);
|
||||
if (iter != tbe_func_adapter_map.end()) {
|
||||
MS_LOG(INFO) << "map actual op from me " << *func_name << " to tbe op" << iter->second;
|
||||
MS_LOG(INFO) << "Map actual op from me: " << *func_name << " to tbe op: " << iter->second;
|
||||
*func_name = iter->second;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
// the TBE back-end operator implementation difference
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
|
||||
enum kCreaterType : int { SINGLE_BUILD = 0, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
|
||||
namespace tbe {
|
||||
using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
|
||||
nlohmann::json *attrs_json);
|
||||
|
|
|
@ -63,7 +63,7 @@ const std::unordered_map<std::string, size_t> type_nbyte_maps = {
|
|||
|
||||
const std::unordered_map<std::string, FusionType> fusion_type_maps = {
|
||||
{"CONVLUTION", FusionType::CONVLUTION}, {"ELEMWISE", FusionType::ELEMWISE}, {"COMMREDUCE", FusionType::COMMREDUCE},
|
||||
{"SEGMENT", FusionType::SEGMENT}, {"DYNAMIC", FusionType::DYNAMIC}, {"OPAQUE", FusionType::OPAQUE},
|
||||
{"SEGMENT", FusionType::SEGMENT}, {"OPAQUE", FusionType::OPAQUE},
|
||||
};
|
||||
|
||||
TypeId DtypeToTypeId(const std::string &dtypes) {
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "backend/kernel_compiler/tbe/tbe_adapter.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_utils.h"
|
||||
#include "utils/ms_context.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
@ -71,14 +72,20 @@ constexpr auto kVTypeListListInt = "listListInt";
|
|||
constexpr auto kJValue = "value";
|
||||
constexpr auto kJDynIndex = "dyn_index";
|
||||
constexpr auto kJFuncName = "func_name";
|
||||
|
||||
std::string NormalizeFullScopeName(const string &full_scope_name) {
|
||||
// exp:Default/ReLU-op0 -->Default_ReLU_op0
|
||||
string normal_ret = full_scope_name;
|
||||
std::replace(normal_ret.begin(), normal_ret.end(), '/', '_');
|
||||
std::replace(normal_ret.begin(), normal_ret.end(), '-', '_');
|
||||
return normal_ret;
|
||||
}
|
||||
constexpr auto kJL1AddrOffset = "L1_addr_offset";
|
||||
constexpr auto kJL1FusionType = "L1_fusion_type";
|
||||
constexpr auto kJL1WorkspaceSize = "L1_workspace_size";
|
||||
constexpr auto kJAddrType = "addr_type";
|
||||
constexpr auto kJSliceOffset = "slice_offset";
|
||||
constexpr auto kJSplitIndex = "split_index";
|
||||
constexpr auto kJTotalShape = "total_shape";
|
||||
constexpr auto kJValidShape = "valid_shape";
|
||||
constexpr auto kJModuleName = "module_name";
|
||||
constexpr auto kJPattern = "pattern";
|
||||
constexpr auto kJPyModulePath = "py_module_path";
|
||||
constexpr auto kJPreBuildOutsAttrs = "prebuild_outs_attrs";
|
||||
constexpr auto kJKwdArgs = "kwds_args";
|
||||
constexpr auto kJListArgs = "list_args";
|
||||
|
||||
bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspore::AnfNode> &anf_node,
|
||||
nlohmann::json *kernel_json) {
|
||||
|
@ -117,13 +124,12 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor
|
|||
op_info_json[kJAttrs] = attrs_json;
|
||||
std::string json_str = op_info_json.dump();
|
||||
size_t hash_id = std::hash<std::string>()(json_str);
|
||||
json_name_ = op_name + "_" + std::to_string(hash_id);
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
||||
json_name_ = op_name + "_" + std::to_string(hash_id) + "_" + std::to_string(device_id);
|
||||
json_info_ = json_str;
|
||||
if (creater_type_ == PREBUILD) {
|
||||
op_info_json[kJKernelName] = NormalizeFullScopeName(anf_node->fullname_with_scope());
|
||||
} else {
|
||||
op_info_json[kJKernelName] = json_name_;
|
||||
}
|
||||
op_info_json[kJKernelName] = json_name_;
|
||||
(*kernel_json)[kJOpInfo] = op_info_json;
|
||||
(*kernel_json)[kJFullName] = anf_node->fullname_with_scope();
|
||||
if (creater_type_ == SINGLE_BUILD) {
|
||||
|
@ -581,25 +587,25 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector<si
|
|||
|
||||
bool TbeKernelBuild::GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr> &input_nodes,
|
||||
const std::vector<mindspore::AnfNodePtr> &compute_nodes,
|
||||
nlohmann::json *fusion_str, std::string *fusion_kernel) {
|
||||
MS_EXCEPTION_IF_NULL(fusion_str);
|
||||
MS_EXCEPTION_IF_NULL(fusion_kernel);
|
||||
nlohmann::json *fusion_json, std::string *fusion_kernel_name) {
|
||||
MS_EXCEPTION_IF_NULL(fusion_json);
|
||||
MS_EXCEPTION_IF_NULL(fusion_kernel_name);
|
||||
// get input layer info
|
||||
std::vector<std::vector<mindspore::AnfNodePtr>> input_layers;
|
||||
std::map<const AnfNodePtr, FusionDataType> spec_data_input;
|
||||
if (!GetInputLayers(input_nodes, compute_nodes, &input_layers, &spec_data_input)) {
|
||||
return false;
|
||||
}
|
||||
// gen fusion scopre_op jsom
|
||||
// gen fusion scopre_op json
|
||||
std::vector<nlohmann::json> compute_list;
|
||||
(*fusion_kernel) = kFusionKernelNamePrfix;
|
||||
(*fusion_kernel_name) = kFusionKernelNamePrfix;
|
||||
// index: fusion build option input record, next one from 0
|
||||
static size_t index = 0;
|
||||
auto layer_iter = input_layers.begin();
|
||||
auto compute_op_iter = compute_nodes.begin();
|
||||
for (; compute_op_iter != compute_nodes.end(); ++compute_op_iter, ++layer_iter) {
|
||||
nlohmann::json compute_op_str;
|
||||
(void)GenFusionComputeJson(*compute_op_iter, &layer_iter, &compute_op_str, fusion_kernel, &index);
|
||||
(void)GenFusionComputeJson(*compute_op_iter, &layer_iter, &compute_op_str, fusion_kernel_name, &index);
|
||||
compute_list.push_back(compute_op_str);
|
||||
}
|
||||
index = 0;
|
||||
|
@ -617,36 +623,122 @@ bool TbeKernelBuild::GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr>
|
|||
}
|
||||
index = 0;
|
||||
data_list.insert(data_list.end(), compute_list.begin(), compute_list.end());
|
||||
(*fusion_str)[kFusionOpList] = data_list;
|
||||
(*fusion_json)[kFusionOpList] = data_list;
|
||||
return true;
|
||||
}
|
||||
|
||||
void TbeKernelBuild::GenPreDescJson(nlohmann::json *output_desc) {
|
||||
MS_EXCEPTION_IF_NULL(output_desc);
|
||||
(*output_desc)[kJL1AddrOffset] = 0;
|
||||
(*output_desc)[kJL1FusionType] = -1;
|
||||
(*output_desc)[kJL1WorkspaceSize] = -1;
|
||||
(*output_desc)[kJAddrType] = 0;
|
||||
}
|
||||
|
||||
void TbeKernelBuild::GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str,
|
||||
std::string *fusion_kernel_name) {
|
||||
MS_EXCEPTION_IF_NULL(compute_op_str);
|
||||
MS_EXCEPTION_IF_NULL(fusion_kernel_name);
|
||||
// gen others
|
||||
auto origin_type = AnfAlgo::GetCNodeName(cnode);
|
||||
// replace special op type for buffer fusion op
|
||||
auto type = GetRealOpType(origin_type);
|
||||
(*compute_op_str)[kJtype] = type;
|
||||
tbe::TbeAdapter::NormalizeFuncName(&type);
|
||||
(*compute_op_str)[kJFuncName] = type;
|
||||
(*compute_op_str)[kJModuleName] = std::string("impl.") + type;
|
||||
(*compute_op_str)[kJName] = cnode->fullname_with_scope();
|
||||
(*compute_op_str)[kJPattern] = GetNodeFusionType(cnode);
|
||||
(*compute_op_str)[kJPyModulePath] = "/usr/local/Ascend/opp/op_impl/build_in/ai_core/tbe";
|
||||
(void)(*fusion_kernel_name).append("_");
|
||||
(void)(*fusion_kernel_name).append(type);
|
||||
}
|
||||
|
||||
void TbeKernelBuild::GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_EXCEPTION_IF_NULL(compute_op_str);
|
||||
// kwds args
|
||||
nlohmann::json json_prebuild_args;
|
||||
json_prebuild_args[kJKwdArgs] = nlohmann::json::object();
|
||||
// list_args
|
||||
nlohmann::json json_list_args;
|
||||
// list_args: output args
|
||||
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
|
||||
for (size_t i = 0; i < output_size; ++i) {
|
||||
nlohmann::json output_desc;
|
||||
GenDescJson(cnode, i, i, &output_desc);
|
||||
output_desc[kJDtype] = output_desc[kJDataType];
|
||||
json_list_args.push_back(output_desc);
|
||||
}
|
||||
// list_args: attr args
|
||||
auto op_name = AnfAlgo::GetCNodeName(cnode);
|
||||
auto opinfo = OpLib::FindOp(op_name, OpImplyType::kTBE);
|
||||
MS_EXCEPTION_IF_NULL(opinfo);
|
||||
TbeKernelJsonCreator json_creater(SINGLE_BUILD);
|
||||
nlohmann::json json_attr_args;
|
||||
if (!json_creater.GenTbeAttrJson(cnode, opinfo, &json_attr_args)) {
|
||||
MS_LOG(INFO) << "Fusion warning: get prebuild args of attr failed.";
|
||||
}
|
||||
for (const auto &attr : json_attr_args) {
|
||||
// if(attr[kJName] != "isRef" && attr["valid"] == true) {
|
||||
if (attr[kJName] != "isRef" && attr[kJValid] == true) {
|
||||
json_list_args.push_back(attr[kJValue]);
|
||||
}
|
||||
}
|
||||
json_prebuild_args[kJListArgs] = json_list_args;
|
||||
(*compute_op_str)[kJPreBuildOutsAttrs] = json_prebuild_args;
|
||||
}
|
||||
|
||||
void TbeKernelBuild::GenSuffixDescJson(nlohmann::json *output_desc) {
|
||||
MS_EXCEPTION_IF_NULL(output_desc);
|
||||
(*output_desc)[kJSliceOffset] = nlohmann::json::array();
|
||||
(*output_desc)[kJSplitIndex] = 0;
|
||||
(*output_desc)[kJTotalShape] = nlohmann::json::array();
|
||||
(*output_desc)[kJValidShape] = nlohmann::json::array();
|
||||
}
|
||||
|
||||
// anf_node: this node is used to get output desc(type\foramt\shape ...)
|
||||
// node_out_idx: node output index
|
||||
// desc_output_idx: this index use to add json
|
||||
// nlohmann::json *output_desc: for return
|
||||
// FusionDataType fusion_data_type: speceial process json desc output shape [kFusionAddN, kFusionReLUGradV2]
|
||||
void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
|
||||
size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type) {
|
||||
GenPreDescJson(output_desc);
|
||||
// data_type
|
||||
auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx);
|
||||
(*output_desc)[kJDataType] = tbe::TypeIdToString(type_id);
|
||||
// name
|
||||
std::string output_desc_name = anf_node->fullname_with_scope();
|
||||
if (node_out_idx > 0) {
|
||||
output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx);
|
||||
}
|
||||
(*output_desc)[kJName] = NormalizeFullScopeName(output_desc_name);
|
||||
auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx);
|
||||
(*output_desc)[kJDataType] = tbe::TypeIdToString(type_id);
|
||||
(*output_desc)[kJName] = output_desc_name;
|
||||
// ori_format
|
||||
(*output_desc)[kJOriFormat] = kOpFormat_NCHW;
|
||||
// ori_shape
|
||||
auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, node_out_idx);
|
||||
if (ori_shape.empty()) {
|
||||
ori_shape.emplace_back(1);
|
||||
}
|
||||
(*output_desc)[kJOriShape] = ori_shape;
|
||||
// !! Note: output_index, only node's output use it
|
||||
(*output_desc)[kJOutputIndex] = desc_output_idx;
|
||||
// shape
|
||||
auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, node_out_idx);
|
||||
if (shape.empty()) {
|
||||
shape.emplace_back(1);
|
||||
}
|
||||
(*output_desc)[kJShape] = shape;
|
||||
// !! Note: format: only data node's output use it
|
||||
auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx);
|
||||
if (format == kOpFormat_DEFAULT) {
|
||||
format = ori_shape.size() == 4 ? kOpFormat_NCHW : kOpFormat_ND;
|
||||
} else if (format == kOpFormat_FRAC_Z) {
|
||||
format = kOpFormat_FRACTAL_Z;
|
||||
}
|
||||
(*output_desc)[kJFormat] = format;
|
||||
(*output_desc)[kJOriFormat] = kOpFormat_NCHW;
|
||||
(*output_desc)[kJOutputIndex] = desc_output_idx;
|
||||
// special node
|
||||
if (fusion_data_type == kFusionAddN && format == kOpFormat_NC1HWC0) {
|
||||
std::vector<size_t> spec_shape = {};
|
||||
spec_shape.emplace_back(shape[0]);
|
||||
|
@ -663,12 +755,13 @@ void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_
|
|||
(*output_desc)[kJShape] = spec_shape;
|
||||
(*output_desc)[kJDataType] = kVTypeBool;
|
||||
}
|
||||
GenSuffixDescJson(output_desc);
|
||||
}
|
||||
|
||||
void TbeKernelBuild::GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
|
||||
size_t output_index, nlohmann::json *output_desc) {
|
||||
std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index);
|
||||
(*output_desc)[kJName] = NormalizeFullScopeName(output_desc_name);
|
||||
(*output_desc)[kJName] = output_desc_name;
|
||||
(*output_desc)[kJOutputIndex] = output_index;
|
||||
std::vector<size_t> shape;
|
||||
(*output_desc)[kJShape] = shape;
|
||||
|
@ -692,6 +785,9 @@ bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name,
|
|||
return true;
|
||||
}
|
||||
|
||||
// <input_nodes> : contains parameter/data node, input order may doesn't match tbe input order;
|
||||
// <compute_nodes> : contains cnode, inputs order may doesn't match tbe input order;
|
||||
// Special process node list: reference tbe_adapter.cc [except: Conv2DBackpropInput]
|
||||
bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes,
|
||||
const std::vector<mindspore::AnfNodePtr> &compute_nodes,
|
||||
std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers,
|
||||
|
@ -722,7 +818,7 @@ bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &in
|
|||
MS_LOG(INFO) << "Fusion info: add compute node's [" << i << "] input: " << input->fullname_with_scope();
|
||||
layer.emplace_back((*find_iter));
|
||||
} else {
|
||||
MS_LOG(INFO) << "Fusion warnig: this input [" << i << "] may be pre compute(" << input->fullname_with_scope()
|
||||
MS_LOG(INFO) << "Fusion warning: this input [" << i << "] may be pre compute(" << input->fullname_with_scope()
|
||||
<< ") node's output.";
|
||||
}
|
||||
}
|
||||
|
@ -750,8 +846,9 @@ bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::Anf
|
|||
MS_EXCEPTION_IF_NULL(data_str);
|
||||
MS_EXCEPTION_IF_NULL(index);
|
||||
std::vector<nlohmann::json> output_desc_list;
|
||||
// if data_input is null, this is optional input.
|
||||
if (!data_input) {
|
||||
MS_LOG(INFO) << "Data input is optional node";
|
||||
MS_LOG(INFO) << "Fusion info: data input is optional node";
|
||||
auto name = std::string(kOptional) + std::to_string(*index);
|
||||
(*data_str)[kJName] = name;
|
||||
nlohmann::json output_desc;
|
||||
|
@ -767,12 +864,16 @@ bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::Anf
|
|||
auto kernel_idx = AnfAlgo::VisitKernel(data_input, 0);
|
||||
auto real_node = kernel_idx.first;
|
||||
size_t real_idx = kernel_idx.second;
|
||||
MS_LOG(INFO) << "Real name " << real_node->fullname_with_scope() << " index:" << real_idx;
|
||||
MS_LOG(INFO) << "Fusion info: Real name: " << real_node->fullname_with_scope() << ". index:" << real_idx;
|
||||
// kJOutputDesc
|
||||
nlohmann::json output_desc;
|
||||
GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type);
|
||||
output_desc_list.push_back(output_desc);
|
||||
(*data_str)[kJName] = NormalizeFullScopeName(real_node->fullname_with_scope());
|
||||
auto full_name = real_node->fullname_with_scope();
|
||||
if (real_idx > 0) {
|
||||
full_name = full_name.append("_").append(std::to_string(real_idx));
|
||||
}
|
||||
(*data_str)[kJName] = full_name;
|
||||
}
|
||||
(*data_str)[kJOutputDesc] = output_desc_list;
|
||||
(*data_str)[kJtype] = "Data";
|
||||
|
@ -808,6 +909,7 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) {
|
|||
size_t TbeKernelBuild::GetOptionalInput(const mindspore::CNodePtr &cnode, bool is_dynamic_input) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (is_dynamic_input) {
|
||||
// Node can not have optional & dynamic input.
|
||||
return 0;
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
|
@ -831,22 +933,46 @@ std::string TbeKernelBuild::GetRealOpType(const std::string &origin_type) {
|
|||
return result;
|
||||
}
|
||||
|
||||
std::string TbeKernelBuild::GetNodeFusionType(const mindspore::CNodePtr &cnode) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto node_type = AnfAlgo::GetCNodeName(cnode);
|
||||
static std::map<std::string, std::string> fusion_type_map = {{kConv2DOpName, "Convolution"},
|
||||
{kBNTrainingReduceOpName, "bn_reduce"},
|
||||
{kBNTrainingUpdateOpName, "bn_update"},
|
||||
{kReluV2OpName, "ElemWise"},
|
||||
{kTensorAddOpName, "ElemWise"},
|
||||
{kConv2DBackpropInputOpName, "Conv2d_backprop_input"},
|
||||
{kAddNOpName, "ElemWise"},
|
||||
{kReluGradV2OpName, "ElemWise"},
|
||||
{kRealDivOpName, "ElemWise"}};
|
||||
auto find = fusion_type_map.find(node_type);
|
||||
if (find == fusion_type_map.end()) {
|
||||
MS_LOG(INFO) << "Fusion warning: get node fusion type failed, origin node type: " << node_type
|
||||
<< " return null string.";
|
||||
return "";
|
||||
} else {
|
||||
return find->second;
|
||||
}
|
||||
}
|
||||
|
||||
bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
|
||||
std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
|
||||
std::vector<nlohmann::json> *input_desc_list, size_t *index) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_EXCEPTION_IF_NULL(input_desc_list);
|
||||
std::vector<nlohmann::json> input_desc_list_tmp = {};
|
||||
// 1. input json
|
||||
bool is_dynamic_input = IsDynamicInput(cnode);
|
||||
for (size_t i = 1; i < cnode->inputs().size(); ++i) {
|
||||
auto input = cnode->input(i);
|
||||
auto kernel_idx = AnfAlgo::VisitKernel(input, 0);
|
||||
auto real_node = kernel_idx.first;
|
||||
size_t real_idx = kernel_idx.second;
|
||||
MS_LOG(INFO) << "Real name" << real_node->fullname_with_scope() << "index:" << real_idx;
|
||||
MS_LOG(INFO) << "Fusion info: real name: " << real_node->fullname_with_scope() << ". index:" << real_idx;
|
||||
nlohmann::json input_desc;
|
||||
GenDescJson(real_node, real_idx, real_idx, &input_desc);
|
||||
if (is_dynamic_input) {
|
||||
// 2. dynamic input json
|
||||
MS_LOG(INFO) << "Node has dynamic input.";
|
||||
input_desc[kJDynIndex] = (i - 1);
|
||||
}
|
||||
|
@ -854,7 +980,8 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
|
|||
}
|
||||
size_t optional_num = GetOptionalInput(cnode, is_dynamic_input);
|
||||
if (optional_num > 0) {
|
||||
MS_LOG(INFO) << "Node has optional input.";
|
||||
// 3. optional input
|
||||
MS_LOG(INFO) << "Fusion info: node has optional input.";
|
||||
for (size_t i = 0; i < optional_num; ++i) {
|
||||
nlohmann::json optional_input_desc;
|
||||
optional_input_desc[kJName] = std::string(kOptional) + std::to_string(*index);
|
||||
|
@ -872,7 +999,7 @@ std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int> &o
|
|||
std::vector<size_t> desc_output_index = {};
|
||||
for (size_t idx = 0; idx < output_used_nums.size(); ++idx) {
|
||||
auto output_use_num_item = output_used_nums[idx];
|
||||
MS_LOG(INFO) << "Output used num[" << idx << "] = " << output_use_num_item;
|
||||
MS_LOG(INFO) << "Fusion info: output used num[" << idx << "] = " << output_use_num_item;
|
||||
desc_output_index.emplace_back(idx);
|
||||
if (output_use_num_item > 1) {
|
||||
desc_output_index.emplace_back(idx);
|
||||
|
@ -887,7 +1014,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode
|
|||
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
|
||||
if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) {
|
||||
auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum);
|
||||
MS_LOG(INFO) << "This node's output has been reused, node name: " << cnode->fullname_with_scope();
|
||||
MS_LOG(INFO) << "Fusion info: this node's output has been reused, node name: " << cnode->fullname_with_scope();
|
||||
if (output_used_nums.size() != output_size) {
|
||||
MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")"
|
||||
<< " is not match output used num(" << output_used_nums.size() << ")";
|
||||
|
@ -930,20 +1057,14 @@ bool TbeKernelBuild::GenFusionComputeJson(const mindspore::AnfNodePtr &compute_n
|
|||
// gen output desc
|
||||
std::vector<nlohmann::json> output_desc_list;
|
||||
if (!GenFusionComputeOutputJson(cnode, &output_desc_list)) {
|
||||
MS_LOG(INFO) << "Fusion Error: gen fusion output desc faild, node full name: " << cnode->fullname_with_scope();
|
||||
MS_LOG(INFO) << "Fusion Error: gen fusion output desc failed, node full name: " << cnode->fullname_with_scope();
|
||||
return false;
|
||||
}
|
||||
(*compute_op_str)[kJOutputDesc] = output_desc_list;
|
||||
// gen others
|
||||
auto origin_type = AnfAlgo::GetCNodeName(cnode);
|
||||
// replace special op type for buffer fusion op
|
||||
auto type = GetRealOpType(origin_type);
|
||||
(*compute_op_str)[kJtype] = type;
|
||||
tbe::TbeAdapter::NormalizeFuncName(&type);
|
||||
(*compute_op_str)[kJFuncName] = type;
|
||||
(*compute_op_str)[kJName] = NormalizeFullScopeName(cnode->fullname_with_scope());
|
||||
(void)(*fusion_kernel_name).append("_");
|
||||
(void)(*fusion_kernel_name).append(type);
|
||||
// gen common desc
|
||||
GenFusionComputeCommonJson(cnode, compute_op_str, fusion_kernel_name);
|
||||
// gen prebuild args
|
||||
GenFusionComputePreBuildJson(cnode, compute_op_str);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -965,7 +1086,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list,
|
|||
MS_EXCEPTION_IF_NULL(output_size_list);
|
||||
input_size_list->clear();
|
||||
output_size_list->clear();
|
||||
|
||||
// cal input size for malloc
|
||||
for (const auto &op : fusion_op_list) {
|
||||
if (op[kJtype] == "Data") {
|
||||
const auto &data_output_desc = op[kJOutputDesc];
|
||||
|
@ -975,23 +1096,23 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list,
|
|||
}
|
||||
auto ret = GetIOSizeImpl(data_output);
|
||||
input_size_list->push_back(ret);
|
||||
MS_LOG(INFO) << "Fusion info: scope input name: " << op[kJName] << ", size: " << ret;
|
||||
MS_LOG(INFO) << "Fusion info: input node name: " << op[kJName] << ", size: " << ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// cal output size for malloc
|
||||
for (const auto &output_node : output_nodes) {
|
||||
auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0);
|
||||
auto real_node = kernel_idx.first;
|
||||
size_t real_idx = kernel_idx.second;
|
||||
auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope());
|
||||
MS_LOG(INFO) << "Fusion info: real node name: " << normal_name << ", real output index: " << real_idx;
|
||||
auto full_name = real_node->fullname_with_scope();
|
||||
MS_LOG(INFO) << "Fusion info: real output node name: " << full_name << ", real output index: " << real_idx;
|
||||
for (const auto &op : fusion_op_list) {
|
||||
if (op[kJName] == normal_name) {
|
||||
if (op[kJName] == full_name) {
|
||||
auto op_output_desces = op[kJOutputDesc];
|
||||
if (output_node != real_node) {
|
||||
// tuple_get item
|
||||
MS_LOG(INFO) << "Output is a tuple getitem node";
|
||||
MS_LOG(INFO) << "Fusion info: output is a tuple get_item node";
|
||||
auto output_desc = op_output_desces[real_idx];
|
||||
if (output_desc[kJShape].empty()) {
|
||||
MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx;
|
||||
|
@ -1001,6 +1122,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list,
|
|||
output_size_list->push_back(ret);
|
||||
MS_LOG(INFO) << "Fusion info: scope output index: " << real_idx << ", size: " << ret;
|
||||
} else {
|
||||
MS_LOG(INFO) << "Fusion info: output is self.";
|
||||
for (const auto &output_desc : op_output_desces) {
|
||||
if (output_desc[kJShape].empty()) {
|
||||
MS_LOG(INFO) << "Fusion info: output_desc's shape is empty, may be this node output";
|
||||
|
|
|
@ -41,8 +41,8 @@ class TbeKernelBuild {
|
|||
std::vector<size_t> *output_size_list);
|
||||
// Ub Fuison
|
||||
static bool GenFusionScopeJson(const std::vector<AnfNodePtr> &input_nodes,
|
||||
const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_str,
|
||||
std::string *fusion_kernel);
|
||||
const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_json,
|
||||
std::string *fusion_kernel_name);
|
||||
static bool GetIOSize(const nlohmann::json &fusion_op_list, const std::vector<AnfNodePtr> &output_nodes,
|
||||
std::vector<size_t> *input_size_list, std::vector<size_t> *output_size_list);
|
||||
|
||||
|
@ -61,9 +61,14 @@ class TbeKernelBuild {
|
|||
static std::vector<size_t> GetDescOutputIndex(const std::vector<int> &output_used_nums);
|
||||
static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode,
|
||||
std::vector<nlohmann::json> *output_desc_list);
|
||||
static void GenPreDescJson(nlohmann::json *output_desc);
|
||||
static void GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str,
|
||||
std::string *fusion_kernel_name);
|
||||
static void GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str);
|
||||
static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
|
||||
size_t desc_output_idx, nlohmann::json *output_desc,
|
||||
FusionDataType fusion_data_type = kFusionNormal);
|
||||
static void GenSuffixDescJson(nlohmann::json *output_desc);
|
||||
static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
|
||||
size_t output_index, nlohmann::json *output_desc);
|
||||
static size_t GetIOSizeImpl(const nlohmann::json &desc);
|
||||
|
@ -76,6 +81,7 @@ class TbeKernelBuild {
|
|||
static bool IsDynamicInput(const CNodePtr &cnode);
|
||||
static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input);
|
||||
static std::string GetRealOpType(const std::string &origin_type);
|
||||
static std::string GetNodeFusionType(const CNodePtr &cnode);
|
||||
};
|
||||
|
||||
class TbeKernelJsonCreator {
|
||||
|
@ -84,14 +90,14 @@ class TbeKernelJsonCreator {
|
|||
~TbeKernelJsonCreator() = default;
|
||||
bool GenTbeSingleKernelJson(const std::shared_ptr<AnfNode> &anf_node, nlohmann::json *kernel_json);
|
||||
std::string json_name() { return json_name_; }
|
||||
bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
|
||||
nlohmann::json *attrs_json);
|
||||
|
||||
private:
|
||||
bool GenTbeInputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
|
||||
nlohmann::json *inputs_json);
|
||||
bool GenTbeOutputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
|
||||
nlohmann::json *outputs_json);
|
||||
bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
|
||||
nlohmann::json *attrs_json);
|
||||
static void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj);
|
||||
bool GenInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, bool value,
|
||||
const std::shared_ptr<OpIOInfo> &input_ptr, const string &op_input_name, size_t input_i,
|
||||
|
|
|
@ -33,42 +33,6 @@
|
|||
namespace mindspore {
|
||||
namespace kernel {
|
||||
using mindspore::kernel::tbe::TbeUtils;
|
||||
|
||||
bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes) {
|
||||
auto build_manger = std::make_shared<ParallelBuildManager>();
|
||||
MS_EXCEPTION_IF_NULL(build_manger);
|
||||
for (const auto &anf_node : anf_nodes) {
|
||||
// gen kernel json
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
nlohmann::json kernel_json;
|
||||
TbeKernelJsonCreator creator(OP_PRE_COMPILE);
|
||||
if (!creator.GenTbeSingleKernelJson(anf_node, &kernel_json)) {
|
||||
MS_LOG(ERROR) << "GenTbeSingleKernelJson failed";
|
||||
return false;
|
||||
}
|
||||
kernel_json["compile_type"] = "pre_build";
|
||||
// op build
|
||||
auto task_id = build_manger->StartCompileOp(kernel_json);
|
||||
build_manger->SavePreTaskInfo(task_id, anf_node);
|
||||
}
|
||||
while (!build_manger->IsAllPreTaskFinish()) {
|
||||
int task_id = -1;
|
||||
std::string task_result;
|
||||
std::string pre_build_result;
|
||||
auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result);
|
||||
if (!ret) {
|
||||
MS_EXCEPTION(ArgumentError) << "Pre Build Failed. wait one ret:" << ret << ", task id:" << task_id;
|
||||
}
|
||||
|
||||
if (task_result != "Success") {
|
||||
MS_EXCEPTION(ArgumentError) << "task pre compile Failed, task id:" << task_id << ", cause:" << task_result;
|
||||
}
|
||||
|
||||
build_manger->PreTaskFinishProcess(task_id, pre_build_result);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
|
||||
auto build_manger = std::make_shared<ParallelBuildManager>();
|
||||
MS_EXCEPTION_IF_NULL(build_manger);
|
||||
|
@ -122,15 +86,8 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
|
|||
return build_manger->GenSameOpKernelMod();
|
||||
}
|
||||
|
||||
ParallelBuildManager::ParallelBuildManager() {}
|
||||
|
||||
ParallelBuildManager::~ParallelBuildManager() { ResetTaskInfo(); }
|
||||
|
||||
void ParallelBuildManager::SavePreTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node) {
|
||||
MS_LOG(INFO) << "SavePreTaskInfo, task id: " << task_id;
|
||||
pre_task_map_[task_id] = anf_node;
|
||||
}
|
||||
|
||||
void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node,
|
||||
const std::string &json_name, const std::vector<size_t> &input_size_list,
|
||||
const std::vector<size_t> &output_size_list, int32_t scope_id) {
|
||||
|
@ -149,42 +106,11 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod
|
|||
task_map_[task_id] = task_info;
|
||||
}
|
||||
|
||||
bool ParallelBuildManager::IsAllPreTaskFinish() const {
|
||||
MS_LOG(INFO) << "wait pre build process task_num: " << pre_task_map_.size();
|
||||
return pre_task_map_.empty();
|
||||
}
|
||||
|
||||
bool ParallelBuildManager::IsAllTaskFinish() const {
|
||||
MS_LOG(INFO) << "wait process task_num: " << task_map_.size();
|
||||
return task_map_.empty();
|
||||
}
|
||||
|
||||
void ParallelBuildManager::PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result) {
|
||||
auto task_iter = pre_task_map_.find(task_id);
|
||||
if (task_iter == pre_task_map_.end()) {
|
||||
MS_EXCEPTION(ArgumentError) << "can find pre task_id:" << task_id;
|
||||
}
|
||||
auto node = task_iter->second;
|
||||
auto builder =
|
||||
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node));
|
||||
std::string start_flag = "fusion_pattern_start";
|
||||
std::string end_flag = "fusion_pattern_end";
|
||||
int start = pre_build_result.find(start_flag);
|
||||
int end = pre_build_result.find(end_flag);
|
||||
if (start != -1 && end != -1 && end >= start) {
|
||||
std::string result = pre_build_result.substr(start + start_flag.size(), end - start - start_flag.size());
|
||||
if (result == "") {
|
||||
(void)pre_task_map_.erase(task_iter);
|
||||
return;
|
||||
}
|
||||
transform(result.begin(), result.end(), result.begin(), ::toupper);
|
||||
FusionType fusion_type = tbe::GetFusionType(result);
|
||||
builder->SetFusionType(fusion_type);
|
||||
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get());
|
||||
}
|
||||
(void)pre_task_map_.erase(task_iter);
|
||||
}
|
||||
|
||||
std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t task_id, bool set_kernel_mod) {
|
||||
auto task_iter = task_map_.find(task_id);
|
||||
if (task_iter == task_map_.end()) {
|
||||
|
|
|
@ -28,7 +28,6 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes);
|
||||
bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes);
|
||||
|
||||
struct KernelBuildTaskInfo {
|
||||
|
@ -42,9 +41,8 @@ struct KernelBuildTaskInfo {
|
|||
|
||||
class ParallelBuildManager {
|
||||
public:
|
||||
ParallelBuildManager();
|
||||
ParallelBuildManager() = default;
|
||||
~ParallelBuildManager();
|
||||
void SavePreTaskInfo(int32_t task_id, const AnfNodePtr &anf_node);
|
||||
void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name,
|
||||
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
|
||||
int32_t scope_id = 0);
|
||||
|
@ -54,10 +52,7 @@ class ParallelBuildManager {
|
|||
bool SearchInCache(const std::string &json_name, const std::string &processor,
|
||||
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
|
||||
AnfNode *node) const;
|
||||
|
||||
bool IsAllPreTaskFinish() const;
|
||||
bool IsAllTaskFinish() const;
|
||||
void PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result);
|
||||
std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true);
|
||||
KernelModPtr GenKernelMod(const string &json_name, const string &processor,
|
||||
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
|
||||
|
|
|
@ -1187,6 +1187,19 @@ TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputPrecision(const AnfNodePtr &node, s
|
|||
return GetCNodeOutputPrecision(kernel_with_index.first);
|
||||
}
|
||||
|
||||
bool AnfRuntimeAlgorithm::IsDynamicShape(const AnfNodePtr &node) {
|
||||
if (!node->isa<CNode>()) {
|
||||
return false;
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto has_attr = AnfAlgo::HasNodeAttr(kAttrIsDynamicShape, cnode);
|
||||
if (!has_attr) {
|
||||
return false;
|
||||
}
|
||||
return AnfAlgo::GetNodeAttr<bool>(node, kAttrIsDynamicShape);
|
||||
}
|
||||
|
||||
bool AnfRuntimeAlgorithm::IsCondControlKernel(const CNodePtr &node) {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (node->inputs().empty()) {
|
||||
|
|
|
@ -217,6 +217,7 @@ class AnfRuntimeAlgorithm {
|
|||
static TypeId GetCNodeOutputPrecision(const AnfNodePtr &node);
|
||||
// get fix output precision from prev node, input_idx is the input index of current node related to prev node.
|
||||
static TypeId GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx);
|
||||
static bool IsDynamicShape(const AnfNodePtr &node);
|
||||
static bool IsCondControlKernel(const CNodePtr &node);
|
||||
static bool IsIndependentNode(const CNodePtr &node);
|
||||
};
|
||||
|
|
|
@ -445,7 +445,6 @@ void AscendSession::InitRuntimeResource() {
|
|||
}
|
||||
|
||||
void AscendSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) const {
|
||||
device::ascend::KernelPreBuild(kernel_graph.get());
|
||||
MS_LOG(INFO) << "HardwareOptimize start!";
|
||||
opt::AscendBackendOptimization(kernel_graph);
|
||||
opt::AscendGraphKernelCommonProcess(kernel_graph);
|
||||
|
|
|
@ -19,7 +19,8 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include "runtime/device/ascend/kernel_select_ascend.h"
|
||||
#include "runtime/device/kernel_info.h"
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
|
@ -61,32 +62,6 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) {
|
|||
return kernel_mod_ptr;
|
||||
}
|
||||
|
||||
static bool KernelPreBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
std::vector<AnfNodePtr> tbe_nodes;
|
||||
for (const auto &anf_node : kernel_graph_ptr->execution_order()) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
if (!AnfAlgo::IsRealKernel(anf_node)) {
|
||||
continue;
|
||||
}
|
||||
KernelType kernel_type = AnfAlgo::GetKernelType(anf_node);
|
||||
switch (kernel_type) {
|
||||
case KernelType::TBE_KERNEL: {
|
||||
if (AnfAlgo::GetKernelMod(anf_node) == nullptr &&
|
||||
AnfAlgo::GetFusionType(anf_node) == kernel::FusionType::DYNAMIC) {
|
||||
tbe_nodes.push_back(anf_node);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
bool ret = kernel::TbeOpParallelPreBuild(tbe_nodes);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
std::vector<AnfNodePtr> tbe_nodes;
|
||||
|
@ -237,12 +212,6 @@ static bool IsAtomicNode(const CNodePtr &kernel_node) {
|
|||
return !(workspace_indexs.empty() && output_indexs.empty());
|
||||
}
|
||||
|
||||
bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
bool ret = device::ascend::KernelPreBuildParallelCompile(kernel_graph_ptr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool KernelBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
TbeUtils::LoadCache();
|
||||
|
|
|
@ -22,10 +22,6 @@
|
|||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
/**
|
||||
* @brief kernel pre build for ascend.
|
||||
*/
|
||||
bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr);
|
||||
/**
|
||||
* @brief kernel build for ascend.
|
||||
*/
|
||||
|
|
|
@ -32,6 +32,7 @@ namespace mindspore {
|
|||
// op name. Op which not exists in operator/ops.h, so define it's name here
|
||||
constexpr auto kFour2FiveOpName = "Four2Five";
|
||||
constexpr auto kFive2FourOpName = "Five2Four";
|
||||
constexpr auto kConv2DOpName = "Conv2D";
|
||||
constexpr auto kConvBN1OpName = "ConvBN1";
|
||||
constexpr auto kBN2AddReluOpName = "BN2AddRelu";
|
||||
constexpr auto kBN2ReLUOpName = "BN2Relu";
|
||||
|
@ -273,6 +274,7 @@ constexpr auto kAttrPadDimSize = "pad_dim_size";
|
|||
constexpr auto kAttrNumSegments = "num_segments";
|
||||
constexpr auto kAttrBegin = "begin";
|
||||
constexpr auto kAttrSize = "size";
|
||||
constexpr auto kAttrIsDynamicShape = "is_dynamic_shape";
|
||||
|
||||
// attr value
|
||||
constexpr auto kValueTargetSwitch = "target_switch";
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
# limitations under the License.
|
||||
|
||||
"""aicpu ops"""
|
||||
from .unique import _unique_aicpu
|
||||
from .init_data_set_queue import _init_data_set_queue_aicpu
|
||||
from .embedding_lookup import _embedding_lookup_aicpu
|
||||
from .padding import _padding_aicpu
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
"""Unique op"""
|
||||
from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType
|
||||
|
||||
unique_op_info = AiCPURegOp("Unique") \
|
||||
.fusion_type("OPAQUE") \
|
||||
.input(0, "x", "required") \
|
||||
.output(0, "y", "required") \
|
||||
.output(1, "idx", "required") \
|
||||
.dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \
|
||||
.dtype_format(DataType.I64_Default, DataType.I64_Default, DataType.I64_Default) \
|
||||
.get_op_info()
|
||||
|
||||
@op_info_register(unique_op_info)
|
||||
def _unique_aicpu():
|
||||
"""Unique AiCPU register"""
|
||||
return
|
|
@ -17,7 +17,7 @@
|
|||
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
|
||||
|
||||
matmul_op_info = TBERegOp("MatMul") \
|
||||
.fusion_type("ELEMWISE") \
|
||||
.fusion_type("OPAQUE") \
|
||||
.async_flag(False) \
|
||||
.binfile_name("matmul.so") \
|
||||
.compute_cost(10) \
|
||||
|
|
|
@ -91,6 +91,7 @@ from ._thor_ops import (CusBatchMatMul, CusCholeskyTrsm, CusFusedAbsMax1, CusImg
|
|||
from .sparse_ops import SparseToDense
|
||||
|
||||
__all__ = [
|
||||
'Unique',
|
||||
'ReverseSequence',
|
||||
'EditDistance',
|
||||
'CropAndResize',
|
||||
|
|
|
@ -597,9 +597,9 @@ class Unique(Primitive):
|
|||
containing indices of elements in the input coressponding to the output tensor.
|
||||
|
||||
Examples:
|
||||
>>> x = Tensor(np.array([1, 2, 5, 2]), mindspore.float32)
|
||||
>>> x = Tensor(np.array([1, 2, 5, 2]), mindspore.int32)
|
||||
>>> out = P.Unique()(x)
|
||||
(Tensor([1, 2, 5], mindspore.int32), Tensor([0, 1, 2, 1], mindspore.float32))
|
||||
(Tensor([1, 2, 5], mindspore.int32), Tensor([0, 1, 2, 1], mindspore.int32))
|
||||
"""
|
||||
@prim_attr_register
|
||||
def __init__(self):
|
||||
|
|
|
@ -35,39 +35,5 @@ StatusFactory::StatusFactory() {}
|
|||
|
||||
std::mutex& StatusFactory::GetMutex() { return GetInstance()->rwMutex_; }
|
||||
|
||||
TsdClient* TsdClient::GetInstance() {
|
||||
static TsdClient instance;
|
||||
return &instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* @ingroup TsdClient
|
||||
* @brief 构造函数
|
||||
*/
|
||||
TsdClient::TsdClient() { rankSize_ = 1; }
|
||||
|
||||
/**
|
||||
* @ingroup TsdClient
|
||||
* @brief 析构函数
|
||||
*/
|
||||
TsdClient::~TsdClient() = default;
|
||||
|
||||
/**
|
||||
* @ingroup TsdClient
|
||||
* @brief framework发送拉起hccp和computer process的命令
|
||||
* @param [in] phyDeviceId : FMK传入物理ID
|
||||
* @param [in] phyDeviceId : FMK传入rankSize
|
||||
* @return TDT_OK:成功 或者其他错误码
|
||||
*/
|
||||
TDT_StatusT TsdClient::Open(const uint32_t deviceId, const uint32_t rankSize) { return TDT_OK; }
|
||||
|
||||
/**
|
||||
* @ingroup TsdClient
|
||||
* @brief 通知TsdClient关闭相关资源
|
||||
* @param 无
|
||||
* @return TDT_OK:成功 或者其他错误码
|
||||
*/
|
||||
TDT_StatusT TsdClient::Close() { return TDT_OK; }
|
||||
|
||||
} // namespace tdt
|
||||
#endif // TDT_MOCK_H
|
||||
|
|
Loading…
Reference in New Issue