update aicpu proto and update module: graphengine

Support Dynamic Shape Aicpu Run Package

adapt tensorengin modify, fix ub fusion
This commit is contained in:
wuxuejian 2020-08-17 11:49:26 +08:00 committed by jonyguo
parent b7425d3e0c
commit bd527a331d
34 changed files with 470 additions and 312 deletions

@ -1 +1 @@
Subproject commit 622af6c1c50034bea5a08bd409c5a410782bfe53
Subproject commit 2dbfefcdd0d4b958801403dbaf9efe46447dccd2

View File

@ -17,8 +17,6 @@ import json
import os
import sys
from te.platform.cce_conf import te_set_version
from te.platform.fusion_manager import op_build_cfg_dis, op_build_cfg_en, set_current_op_name, \
init_op_pattern, set_op_params, set_op_build_type, get_op_pattern, set_current_op_func_name
from te.platform.fusion_util import fusion_op
from common import check_kernel_info, get_args, get_build_in_impl_path, get_ddk_version
@ -27,7 +25,6 @@ build_in_impl_path = get_build_in_impl_path()
# op function list
op_build = "compile"
op_pre_build = "pre_build"
fusion_pattern_start_flag = "fusion_pattern_start"
fusion_pattern_end_flag = "fusion_pattern_end"
@ -83,19 +80,7 @@ def build_op(build_type, json_str):
else:
op_module = __import__("impl."+op_name, globals(), locals(), [op_name], 0)
# get function
if build_type == op_pre_build:
# set op parameter
op_build_cfg_dis()
set_current_op_func_name(op_name)
set_current_op_name(kernel_name)
init_op_pattern()
set_op_params(*outputs_args, *attrs_args, kernel_name=kernel_name)
set_op_build_type('prebuild')
if custom_flag:
py_fn_name = kernel_info['op_info']['name']
else:
py_fn_name = op_name
elif build_type == op_build:
if build_type == op_build:
if custom_flag:
py_fn_name = kernel_info['op_info']['name']
else:
@ -106,13 +91,6 @@ def build_op(build_type, json_str):
if op_func is None:
raise ValueError("Op:{} function {} is not supported by Tbe.".format(op_name, build_type))
# pre build
if build_type == op_pre_build:
op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
# disable only pattern configuration
op_build_cfg_en()
return get_op_pattern()
# call function
if kernel_name[0:19] == "bounding_box_encode":
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name_val=kernel_name)
@ -120,8 +98,6 @@ def build_op(build_type, json_str):
return op_func(*inputs_args, *outputs_args, *attrs_args, kernel_name=kernel_name)
except Exception as e:
if build_type == op_pre_build:
op_build_cfg_en()
raise RuntimeError(e)
@ -136,14 +112,9 @@ def compile_fusion_op(json_str):
Exception: If specific keyword is not found.
"""
args = json.loads(json_str)
te_set_version(ddk_version)
if 'fusion_op' not in args or not args['fusion_op']:
raise ValueError("Json string Errors, key:fusion_op not found.")
if 'prebuild_ops' not in args or not args['prebuild_ops']:
raise ValueError("Json string Errors, key:prebuild_ops not found.")
pre_build_op_list = args['prebuild_ops']
for op in pre_build_op_list:
build_op(op_pre_build, json.dumps(op))
fusion_op_arg = args['fusion_op']
return fusion_op(json.dumps(fusion_op_arg))
@ -159,8 +130,6 @@ def compile_with_json(json_str):
json_info = json.loads(json_str)
if "fusion_op" in json_info:
ret = compile_fusion_op(json_str)
elif "compile_type" in json_info:
ret = build_op(op_pre_build, json_str)
else:
ret = build_op(op_build, json_str)
return ret

View File

@ -20,6 +20,8 @@
#include <vector>
#include <memory>
#include <algorithm>
#include <map>
#include <climits>
#include "runtime/device/kernel_runtime.h"
#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
#include "backend/kernel_compiler/akg/akg_kernel_build.h"
@ -218,7 +220,7 @@ void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef
mindspore::TensorShape_Dim *dim = tensorShape->add_dim();
dim->set_size((::google::protobuf::int64)item);
}
node_inputs->set_tensor_type((mindspore::DataType)input_data_type);
node_inputs->set_tensor_type(input_data_type);
node_inputs->set_mem_device("HBM");
}
}
@ -245,7 +247,7 @@ void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef
}
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
node_outputs->set_tensor_type((mindspore::DataType)output_data_type);
node_outputs->set_tensor_type(output_data_type);
node_outputs->set_mem_device("HBM");
}
}
@ -287,6 +289,109 @@ bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,
return true;
}
bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
if (!anf_node->isa<CNode>()) {
return true;
}
if (!AnfAlgo::IsDynamicShape(anf_node)) {
return true;
}
MS_LOG(INFO) << "CreateExtInfo start, " << anf_node->fullname_with_scope();
int32_t unknown_shape_type = UnknowShapeOpType::DEPEND_COMPUTE;
uint64_t ext_info_head_len = kExtInfoHeadSize;
std::string ext_info;
size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);
size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);
// 1.addr:unknown shape type
uint64_t ext_info_len = ext_info.size();
ext_info_len += ext_info_head_len + sizeof(int32_t);
// 2.addr:input ShapeAndType
ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType);
// 3.addr:output ShapeAndType
ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType);
uint64_t ext_info_offset = ext_info.size();
ext_info.resize(ext_info_len, 0);
char *ext_info_buf = ext_info.data();
// deal1: unknown shape type
ExtInfo *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
info->infoType = FWK_ADPT_EXT_SHAPE_TYPE;
info->infoLen = sizeof(int32_t);
ext_info_offset += ext_info_head_len;
int32_t *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset);
*shape_type = unknown_shape_type;
ext_info_offset += info->infoLen;
// deal2:input ShapeAndType
info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
info->infoType = FWK_ADPT_EXT_INPUT_SHAPE;
info->infoLen = input_num * sizeof(ShapeAndType);
ext_info_offset += ext_info_head_len;
ShapeAndType *inputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
for (size_t input_index = 0; input_index < input_num; input_index++) {
TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);
std::vector<size_t> input_shape;
int32_t input_data_type;
if (input_type == kObjectTypeString) {
auto cnode = anf_node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
auto input_node = cnode->inputs()[input_index + 1];
auto value_ptr = GetValueNode(input_node);
auto value = GetValue<std::string>(value_ptr);
input_shape.push_back(1);
input_shape.push_back(value.size());
input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown);
} else {
input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);
input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type);
}
inputs[input_index].type = input_data_type;
size_t input_shape_index = 0;
for (; input_shape_index < input_shape.size(); input_shape_index++) {
inputs[input_index].dims[input_shape_index] = SizeToLong(input_shape[input_shape_index]);
}
if (input_shape.size() < kMaxShapeDims) {
inputs[input_index].dims[input_shape_index] = LLONG_MIN;
}
}
ext_info_offset += info->infoLen;
// deal3:output ShapeAndType
info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);
info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE;
info->infoLen = output_num * sizeof(ShapeAndType);
ext_info_offset += ext_info_head_len;
ShapeAndType *outputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);
for (size_t output_index = 0; output_index < output_num; output_index++) {
std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);
int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);
outputs[output_index].type = output_data_type;
size_t output_shape_index = 0;
for (; output_shape_index < output_shape.size(); output_shape_index++) {
outputs[output_index].dims[output_shape_index] = SizeToLong(output_shape[output_shape_index]);
}
if (output_shape_index < kMaxShapeDims) {
outputs[output_index].dims[output_shape_index] = LLONG_MIN;
}
}
// set ext info
kernel_mod_ptr->SetExtInfo(ext_info);
return true;
}
KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
std::string op_name = AnfAlgo::GetCNodeName(anf_node);
@ -300,6 +405,11 @@ KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
}
if (!CreateExtInfo(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Create nodeDefBytes faild!";
}
if (!SetIOSize(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Set input output size list failed.";
}

View File

@ -43,6 +43,7 @@ AicpuOpKernelMod::~AicpuOpKernelMod() {
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
ext_info_.clear();
}
void AicpuOpKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
@ -54,6 +55,7 @@ const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { retu
void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; }
void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; }
void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); }
void AicpuOpKernelMod::SetExtInfo(const std::string &ext_info) { ext_info_ = ext_info; }
void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; }
void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
@ -84,16 +86,30 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs
auto node_def_len = node_def_str_.length();
param_len += node_def_len;
param_len += sizeof(uint32_t);
AicpuParamHead aicpu_param_head;
aicpu_param_head.length = param_len;
aicpu_param_head.ioAddrNum = io_addrs_num;
if (ext_info_.empty()) {
MS_LOG(INFO) << "Static Shape Kernel";
aicpu_param_head.extInfoLength = 0;
aicpu_param_head.extInfoAddr = 0;
} else {
MS_LOG(INFO) << "Dynamic Kernel Ext Info size:" << ext_info_.size();
}
// Create taskArgs: AicpuParamHead + ioAddrs + notifyId + customizedAttr
AicpuParamHead paramHead = {static_cast<uint32_t>(param_len), static_cast<uint32_t>(io_addrs_num)};
args_.clear();
(void)args_.append(reinterpret_cast<const char *>(&paramHead), sizeof(AicpuParamHead));
(void)args_.append(reinterpret_cast<const char *>(&aicpu_param_head), sizeof(AicpuParamHead));
// TaskArgs append ioAddrs
if (io_addrs_size != 0) {
(void)args_.append(reinterpret_cast<const char *>(io_addrs.data()), io_addrs_size);
}
// size for node_def
args_.append(reinterpret_cast<const char *>(&node_def_len), sizeof(uint32_t));
// When it's aicpu customized ops, taskArgs should append customized attr
if (node_def_len != 0) {
(void)args_.append(reinterpret_cast<const char *>(node_def_str_.data()), node_def_len);
@ -145,8 +161,9 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
node_name_ = kTopKV2;
}
AicpuTaskInfoPtr task_info_ptr = make_shared<ge::model_runner::AicpuTaskInfo>(
kernel_name_, stream_id, node_so_, node_name_, node_def_str_, input_data_addrs, output_data_addrs, NeedDump());
AicpuTaskInfoPtr task_info_ptr =
make_shared<ge::model_runner::AicpuTaskInfo>(kernel_name_, stream_id, node_so_, node_name_, node_def_str_,
ext_info_, input_data_addrs, output_data_addrs, NeedDump());
MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
return {task_info_ptr};

View File

@ -36,6 +36,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
void SetOutputList(const std::vector<int64_t> &outputList);
void SetAnfNode(const AnfNodePtr &anf_node);
void SetNodeDef(const std::string &nodeDef);
void SetExtInfo(const std::string &ext_info);
void SetNodeName(const std::string &node_name);
/**
@ -58,6 +59,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
std::string node_def_str_;
std::string node_name_;
std::string node_so_;
std::string ext_info_;
std::vector<int64_t> inputList_;
std::vector<int64_t> outputList_;
AnfNodePtr anf_node_;

View File

@ -21,7 +21,6 @@
#include <map>
#include <string>
#include "backend/kernel_compiler/kernel.h"
namespace mindspore {
namespace kernel {
constexpr auto kInitDataSetQueue = "InitDataSetQueue";
@ -50,6 +49,36 @@ struct AicpuParamHead {
uint64_t extInfoAddr; // extInfo address
} __attribute__((packed));
const uint32_t kExtInfoHeadSize = 8;
struct ExtInfo {
int32_t infoType; // extend type
uint32_t infoLen; // length for infoMsg
char infoMsg[0]; // extend value
} __attribute__((packed));
// Extent info ShapeAndType
const uint32_t kMaxShapeDims = 8;
struct ShapeAndType {
int32_t type;
int64_t dims[kMaxShapeDims];
} __attribute__((packed));
// Extend Info type for task
enum FWKTaskExtInfoType {
FWK_ADPT_EXT_SHAPE_TYPE = 0,
FWK_ADPT_EXT_INPUT_SHAPE,
FWK_ADPT_EXT_OUTPUT_SHAPE,
FWK_ADPT_EXT_INVALID
};
// for unknown shape op type
enum UnknowShapeOpType {
DEPEND_IN_SHAPE = 1, // op out shape get by input shape
DEPEND_CONST_VALUE = 2, // op out shape get by const op value
DEPEND_SHAPE_RANGE = 3, // op out shape get by range
DEPEND_COMPUTE = 4 // op out shape get by totally computing
};
class AicpuOpUtil {
public:
static int MsTypeToProtoType(TypeId ms_type);

View File

@ -26,7 +26,7 @@ message AttrValue {
repeated int64 i = 3 [ packed = true ]; //"array(int)"
repeated float f = 4 [ packed = true ]; //"array(float)"
repeated bool b = 5 [ packed = true ]; //"array(bool)"
repeated DataType type = 6 [ packed = true ]; //"array(type)"
repeated int32 type = 6 [ packed = true ]; //"array(type)"
repeated TensorShape shape = 7; //"array(shape)"
repeated Tensor tensor = 8; //"array(tensor)"
}

View File

@ -18,9 +18,16 @@ package mindspore;
import "attr.proto";
import "tensor.proto";
message DynamicIdx {
int32 idx = 1;
int32 num = 2;
}
message NodeDef {
string op = 2;
map<string, AttrValue> attrs = 3;
repeated Tensor inputs = 4;
repeated Tensor outputs = 5;
map<string, DynamicIdx> dym_inputs = 6;
map<string, DynamicIdx> dym_outputs = 7;
}

View File

@ -26,9 +26,12 @@ message Tensor {
TensorShape tensor_shape = 1;
// tensor content data type
DataType tensor_type = 2;
int32 tensor_type = 2;
// tensor memory device
// data located memory device , "DDR" "HBM" OR "NONE"
string mem_device = 3;
string name = 4;
uint64 data_ptr = 5;
uint64 data_size = 6;
}

View File

@ -31,5 +31,5 @@ message TensorShape {
bool unknown_rank = 3;
// data format "NHWC" "NCHW" "NC1HWC0" OR "NONE"
string data_format = 4;
int32 data_format = 4;
};

View File

@ -19,17 +19,30 @@ option cc_enable_arenas = true;
package mindspore;
enum DataType {
MS_UNKNOWN = 0;
MS_BOOL = 1;
MS_FLOAT32 = 0;
MS_FLOAT16 = 1;
MS_INT8 = 2;
MS_UINT8 = 3;
MS_INT16 = 4;
MS_UINT16 = 5;
MS_INT32 = 6;
MS_UINT32 = 7;
MS_INT64 = 8;
MS_UINT64 = 9;
MS_FLOAT16 = 10;
MS_FLOAT32 = 11;
MS_FLOAT64 = 12;
MS_INT32 = 3;
MS_UINT8 = 4;
MS_INT16 = 6;
MS_UINT16 = 7;
MS_UINT32 = 8;
MS_INT64 = 9;
MS_UINT64 = 10;
MS_FLOAT64 = 11;
MS_BOOL = 12;
MS_STRING = 13;
MS_DUAL_SUB_INT8 = 14;
MS_DUAL_SUB_UINT8 = 15;
MS_COMPLEX64 = 16;
MS_COMPLEX128 = 17;
MS_QINT8 = 18;
MS_QINT16 = 19;
MS_QINT32 = 20;
MS_QUINT8 = 21;
MS_QUINT16 = 22;
MS_RESOURCE = 23;
MS_STRING_REF = 24;
MS_DUAL = 25;
MS_UNKNOWN = 26;
}

View File

@ -37,7 +37,6 @@ enum FusionType {
COMMREDUCE,
SEGMENT,
OPAQUE,
DYNAMIC,
UNKNOWN_FUSION_TYPE = -1,
};
enum OpPattern {
@ -80,8 +79,8 @@ class KernelPack {
bool LoadKernelMeta(const std::string &json_f, const std::string &processor);
bool ReadFromJsonFile(const std::string &json_f, const std::string &processor);
const std::string Serialize() const;
const FlexArray *const GetJson() const { return json_; }
const FlexArray *const GetKernel() const { return kernel_; }
const FlexArray *GetJson() const { return json_; }
const FlexArray *GetKernel() const { return kernel_; }
~KernelPack() {
if (json_) {
delete[] json_;

View File

@ -19,53 +19,36 @@
#include <map>
#include <string>
#include <memory>
#include <utility>
#include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
#include "backend/kernel_compiler/tbe/tbe_kernel_parallel_build.h"
#include "backend/kernel_compiler/tbe/tbe_utils.h"
#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
#include "utils/ms_context.h"
namespace mindspore {
namespace kernel {
using mindspore::kernel::tbe::TbeUtils;
static bool GenPreBuildKernelJson(const std::vector<AnfNodePtr> &compute_nodes,
std::vector<nlohmann::json> *prebuild_op_list) {
MS_EXCEPTION_IF_NULL(prebuild_op_list);
TbeKernelJsonCreator creator(PREBUILD);
for (const auto &anf_node : compute_nodes) {
nlohmann::json prebuild;
if (!creator.GenTbeSingleKernelJson(anf_node, &prebuild)) {
MS_LOG(ERROR) << "GenTbeSingleKernelJson failed";
return false;
}
(*prebuild_op_list).push_back(prebuild);
}
return true;
}
std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo> &fusion_scopes) {
MS_LOG(INFO) << "kernel fusion build start, scope size:" << fusion_scopes.size();
std::map<int32_t, KernelModPtr> kernel_mod_ret;
auto build_manger = std::make_shared<ParallelBuildManager>();
MS_EXCEPTION_IF_NULL(build_manger);
for (const auto &fusion_scope_iter : fusion_scopes) {
auto scope_id = fusion_scope_iter.scope_id;
string fusion_kernel_name;
nlohmann::json fusion_op;
string fusion_kernel = "te_fusion";
if (!TbeKernelBuild::GenFusionScopeJson(fusion_scope_iter.input_nodes, fusion_scope_iter.compute_nodes, &fusion_op,
&fusion_kernel)) {
&fusion_kernel_name)) {
continue;
}
// gen kernel_name & check cache
std::string json_str = fusion_op.dump();
size_t hash_id = std::hash<std::string>()(json_str);
auto json_name = fusion_kernel.append("_").append(std::to_string(hash_id));
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
auto json_name =
fusion_kernel_name.append("_").append(std::to_string(hash_id)).append("_").append(std::to_string(device_id));
fusion_op["fusion_op_name"] = json_name;
// gen json for prebuild
std::vector<nlohmann::json> prebuild_op_list;
if (!GenPreBuildKernelJson(fusion_scope_iter.compute_nodes, &prebuild_op_list)) {
continue;
}
// get io size
std::vector<size_t> input_size_list;
std::vector<size_t> output_size_list;
@ -80,20 +63,20 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
auto kernel_mod =
build_manger->GenKernelMod(json_name, tbe::kProcessorAiCore, input_size_list, output_size_list, kernel_pack);
if (kernel_mod != nullptr) {
kernel_mod_ret[scope_id] = kernel_mod;
kernel_mod_ret[fusion_scope_iter.scope_id] = kernel_mod;
continue;
}
}
// fusion build
nlohmann::json fusion_json;
fusion_json["fusion_op"] = fusion_op;
fusion_json["prebuild_ops"] = prebuild_op_list;
auto task_id = build_manger->StartCompileOp(fusion_json);
TbeUtils::SaveJsonInfo(json_name, fusion_json.dump());
if (task_id < 0) {
MS_EXCEPTION(ArgumentError) << "start compile failed.";
}
build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list, scope_id);
build_manger->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list,
fusion_scope_iter.scope_id);
}
int build_failed_num = 0;

View File

@ -16,6 +16,7 @@
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNELFUSION_H_
#include <utility>
#include <vector>
#include <map>
#include "backend/kernel_compiler/kernel.h"
@ -25,11 +26,9 @@ namespace kernel {
* @brief fuse op and return a callable mod
*/
struct FusionScopeInfo {
FusionScopeInfo() {}
FusionScopeInfo(int32_t id, const std::vector<AnfNodePtr> &in, const std::vector<AnfNodePtr> &comp,
const std::vector<AnfNodePtr> &out)
: scope_id(id), input_nodes(in), compute_nodes(comp), output_nodes(out) {}
int32_t scope_id;
FusionScopeInfo(int32_t id, std::vector<AnfNodePtr> in, std::vector<AnfNodePtr> comp, std::vector<AnfNodePtr> out)
: scope_id(id), input_nodes(std::move(in)), compute_nodes(std::move(comp)), output_nodes(std::move(out)) {}
int32_t scope_id{};
std::vector<AnfNodePtr> input_nodes;
std::vector<AnfNodePtr> compute_nodes;
std::vector<AnfNodePtr> output_nodes;

View File

@ -40,14 +40,13 @@ class OpLib {
private:
static bool RegOpFromLocalInfo();
static bool DecodeOpInfo(const nlohmann::json &obj, const OpImplyType imply_type, const std::string &impl_path);
static bool DecodeAttr(const nlohmann::json &obj, const OpImplyType imply_type,
const std::shared_ptr<OpInfo> &op_info);
static bool DecodeOpInfo(const nlohmann::json &obj, OpImplyType imply_type, const std::string &impl_path);
static bool DecodeAttr(const nlohmann::json &obj, OpImplyType imply_type, const std::shared_ptr<OpInfo> &op_info);
static bool DecodeDtypeFormat(const nlohmann::json &dtype_format, const std::shared_ptr<OpIOInfo> &op_io,
size_t index);
static void DecodeTBESpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
static void DecodeAKGSpecificInfo(const nlohmann::json &obj, const std::shared_ptr<OpInfo> &op_info);
static bool DecodeInputOutput(const nlohmann::json &obj, const OpImplyType imply_type, const OpIOType io_type,
static bool DecodeInputOutput(const nlohmann::json &obj, OpImplyType imply_type, OpIOType io_type,
const std::shared_ptr<OpInfo> &op_info, const nlohmann::json &dtype_format);
static bool GetRefInfo(const std::shared_ptr<OpInfo> &op_info);
static bool CheckRepetition(const std::shared_ptr<OpInfo> &op_info);

View File

@ -173,7 +173,7 @@ void TbeAdapter::NormalizeFuncName(std::string *func_name) {
*func_name = name_tmp;
auto iter = tbe_func_adapter_map.find(*func_name);
if (iter != tbe_func_adapter_map.end()) {
MS_LOG(INFO) << "map actual op from me " << *func_name << " to tbe op" << iter->second;
MS_LOG(INFO) << "Map actual op from me: " << *func_name << " to tbe op: " << iter->second;
*func_name = iter->second;
}
}

View File

@ -27,7 +27,7 @@
// the TBE back-end operator implementation difference
namespace mindspore {
namespace kernel {
enum kCreaterType : int { SINGLE_BUILD = 0, PREBUILD, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
enum kCreaterType : int { SINGLE_BUILD = 0, OP_SELECT_FORMAT, CHECK_SUPPORTED, OP_PRE_COMPILE };
namespace tbe {
using FAttrsPass = void (*)(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
nlohmann::json *attrs_json);

View File

@ -63,7 +63,7 @@ const std::unordered_map<std::string, size_t> type_nbyte_maps = {
const std::unordered_map<std::string, FusionType> fusion_type_maps = {
{"CONVLUTION", FusionType::CONVLUTION}, {"ELEMWISE", FusionType::ELEMWISE}, {"COMMREDUCE", FusionType::COMMREDUCE},
{"SEGMENT", FusionType::SEGMENT}, {"DYNAMIC", FusionType::DYNAMIC}, {"OPAQUE", FusionType::OPAQUE},
{"SEGMENT", FusionType::SEGMENT}, {"OPAQUE", FusionType::OPAQUE},
};
TypeId DtypeToTypeId(const std::string &dtypes) {

View File

@ -24,6 +24,7 @@
#include "backend/kernel_compiler/tbe/tbe_adapter.h"
#include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
#include "backend/kernel_compiler/tbe/tbe_utils.h"
#include "utils/ms_context.h"
namespace mindspore {
namespace kernel {
@ -71,14 +72,20 @@ constexpr auto kVTypeListListInt = "listListInt";
constexpr auto kJValue = "value";
constexpr auto kJDynIndex = "dyn_index";
constexpr auto kJFuncName = "func_name";
std::string NormalizeFullScopeName(const string &full_scope_name) {
// exp:Default/ReLU-op0 -->Default_ReLU_op0
string normal_ret = full_scope_name;
std::replace(normal_ret.begin(), normal_ret.end(), '/', '_');
std::replace(normal_ret.begin(), normal_ret.end(), '-', '_');
return normal_ret;
}
constexpr auto kJL1AddrOffset = "L1_addr_offset";
constexpr auto kJL1FusionType = "L1_fusion_type";
constexpr auto kJL1WorkspaceSize = "L1_workspace_size";
constexpr auto kJAddrType = "addr_type";
constexpr auto kJSliceOffset = "slice_offset";
constexpr auto kJSplitIndex = "split_index";
constexpr auto kJTotalShape = "total_shape";
constexpr auto kJValidShape = "valid_shape";
constexpr auto kJModuleName = "module_name";
constexpr auto kJPattern = "pattern";
constexpr auto kJPyModulePath = "py_module_path";
constexpr auto kJPreBuildOutsAttrs = "prebuild_outs_attrs";
constexpr auto kJKwdArgs = "kwds_args";
constexpr auto kJListArgs = "list_args";
bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspore::AnfNode> &anf_node,
nlohmann::json *kernel_json) {
@ -117,13 +124,12 @@ bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspor
op_info_json[kJAttrs] = attrs_json;
std::string json_str = op_info_json.dump();
size_t hash_id = std::hash<std::string>()(json_str);
json_name_ = op_name + "_" + std::to_string(hash_id);
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
json_name_ = op_name + "_" + std::to_string(hash_id) + "_" + std::to_string(device_id);
json_info_ = json_str;
if (creater_type_ == PREBUILD) {
op_info_json[kJKernelName] = NormalizeFullScopeName(anf_node->fullname_with_scope());
} else {
op_info_json[kJKernelName] = json_name_;
}
op_info_json[kJKernelName] = json_name_;
(*kernel_json)[kJOpInfo] = op_info_json;
(*kernel_json)[kJFullName] = anf_node->fullname_with_scope();
if (creater_type_ == SINGLE_BUILD) {
@ -581,25 +587,25 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector<si
bool TbeKernelBuild::GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr> &input_nodes,
const std::vector<mindspore::AnfNodePtr> &compute_nodes,
nlohmann::json *fusion_str, std::string *fusion_kernel) {
MS_EXCEPTION_IF_NULL(fusion_str);
MS_EXCEPTION_IF_NULL(fusion_kernel);
nlohmann::json *fusion_json, std::string *fusion_kernel_name) {
MS_EXCEPTION_IF_NULL(fusion_json);
MS_EXCEPTION_IF_NULL(fusion_kernel_name);
// get input layer info
std::vector<std::vector<mindspore::AnfNodePtr>> input_layers;
std::map<const AnfNodePtr, FusionDataType> spec_data_input;
if (!GetInputLayers(input_nodes, compute_nodes, &input_layers, &spec_data_input)) {
return false;
}
// gen fusion scopre_op jsom
// gen fusion scopre_op json
std::vector<nlohmann::json> compute_list;
(*fusion_kernel) = kFusionKernelNamePrfix;
(*fusion_kernel_name) = kFusionKernelNamePrfix;
// index: fusion build option input record, next one from 0
static size_t index = 0;
auto layer_iter = input_layers.begin();
auto compute_op_iter = compute_nodes.begin();
for (; compute_op_iter != compute_nodes.end(); ++compute_op_iter, ++layer_iter) {
nlohmann::json compute_op_str;
(void)GenFusionComputeJson(*compute_op_iter, &layer_iter, &compute_op_str, fusion_kernel, &index);
(void)GenFusionComputeJson(*compute_op_iter, &layer_iter, &compute_op_str, fusion_kernel_name, &index);
compute_list.push_back(compute_op_str);
}
index = 0;
@ -617,36 +623,122 @@ bool TbeKernelBuild::GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr>
}
index = 0;
data_list.insert(data_list.end(), compute_list.begin(), compute_list.end());
(*fusion_str)[kFusionOpList] = data_list;
(*fusion_json)[kFusionOpList] = data_list;
return true;
}
void TbeKernelBuild::GenPreDescJson(nlohmann::json *output_desc) {
MS_EXCEPTION_IF_NULL(output_desc);
(*output_desc)[kJL1AddrOffset] = 0;
(*output_desc)[kJL1FusionType] = -1;
(*output_desc)[kJL1WorkspaceSize] = -1;
(*output_desc)[kJAddrType] = 0;
}
void TbeKernelBuild::GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str,
std::string *fusion_kernel_name) {
MS_EXCEPTION_IF_NULL(compute_op_str);
MS_EXCEPTION_IF_NULL(fusion_kernel_name);
// gen others
auto origin_type = AnfAlgo::GetCNodeName(cnode);
// replace special op type for buffer fusion op
auto type = GetRealOpType(origin_type);
(*compute_op_str)[kJtype] = type;
tbe::TbeAdapter::NormalizeFuncName(&type);
(*compute_op_str)[kJFuncName] = type;
(*compute_op_str)[kJModuleName] = std::string("impl.") + type;
(*compute_op_str)[kJName] = cnode->fullname_with_scope();
(*compute_op_str)[kJPattern] = GetNodeFusionType(cnode);
(*compute_op_str)[kJPyModulePath] = "/usr/local/Ascend/opp/op_impl/build_in/ai_core/tbe";
(void)(*fusion_kernel_name).append("_");
(void)(*fusion_kernel_name).append(type);
}
void TbeKernelBuild::GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(compute_op_str);
// kwds args
nlohmann::json json_prebuild_args;
json_prebuild_args[kJKwdArgs] = nlohmann::json::object();
// list_args
nlohmann::json json_list_args;
// list_args: output args
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
for (size_t i = 0; i < output_size; ++i) {
nlohmann::json output_desc;
GenDescJson(cnode, i, i, &output_desc);
output_desc[kJDtype] = output_desc[kJDataType];
json_list_args.push_back(output_desc);
}
// list_args: attr args
auto op_name = AnfAlgo::GetCNodeName(cnode);
auto opinfo = OpLib::FindOp(op_name, OpImplyType::kTBE);
MS_EXCEPTION_IF_NULL(opinfo);
TbeKernelJsonCreator json_creater(SINGLE_BUILD);
nlohmann::json json_attr_args;
if (!json_creater.GenTbeAttrJson(cnode, opinfo, &json_attr_args)) {
MS_LOG(INFO) << "Fusion warning: get prebuild args of attr failed.";
}
for (const auto &attr : json_attr_args) {
// if(attr[kJName] != "isRef" && attr["valid"] == true) {
if (attr[kJName] != "isRef" && attr[kJValid] == true) {
json_list_args.push_back(attr[kJValue]);
}
}
json_prebuild_args[kJListArgs] = json_list_args;
(*compute_op_str)[kJPreBuildOutsAttrs] = json_prebuild_args;
}
void TbeKernelBuild::GenSuffixDescJson(nlohmann::json *output_desc) {
MS_EXCEPTION_IF_NULL(output_desc);
(*output_desc)[kJSliceOffset] = nlohmann::json::array();
(*output_desc)[kJSplitIndex] = 0;
(*output_desc)[kJTotalShape] = nlohmann::json::array();
(*output_desc)[kJValidShape] = nlohmann::json::array();
}
// anf_node: this node is used to get output desc(type\foramt\shape ...)
// node_out_idx: node output index
// desc_output_idx: this index use to add json
// nlohmann::json *output_desc: for return
// FusionDataType fusion_data_type: speceial process json desc output shape [kFusionAddN, kFusionReLUGradV2]
void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type) {
GenPreDescJson(output_desc);
// data_type
auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx);
(*output_desc)[kJDataType] = tbe::TypeIdToString(type_id);
// name
std::string output_desc_name = anf_node->fullname_with_scope();
if (node_out_idx > 0) {
output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx);
}
(*output_desc)[kJName] = NormalizeFullScopeName(output_desc_name);
auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx);
(*output_desc)[kJDataType] = tbe::TypeIdToString(type_id);
(*output_desc)[kJName] = output_desc_name;
// ori_format
(*output_desc)[kJOriFormat] = kOpFormat_NCHW;
// ori_shape
auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, node_out_idx);
if (ori_shape.empty()) {
ori_shape.emplace_back(1);
}
(*output_desc)[kJOriShape] = ori_shape;
// !! Note: output_index, only node's output use it
(*output_desc)[kJOutputIndex] = desc_output_idx;
// shape
auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, node_out_idx);
if (shape.empty()) {
shape.emplace_back(1);
}
(*output_desc)[kJShape] = shape;
// !! Note: format: only data node's output use it
auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx);
if (format == kOpFormat_DEFAULT) {
format = ori_shape.size() == 4 ? kOpFormat_NCHW : kOpFormat_ND;
} else if (format == kOpFormat_FRAC_Z) {
format = kOpFormat_FRACTAL_Z;
}
(*output_desc)[kJFormat] = format;
(*output_desc)[kJOriFormat] = kOpFormat_NCHW;
(*output_desc)[kJOutputIndex] = desc_output_idx;
// special node
if (fusion_data_type == kFusionAddN && format == kOpFormat_NC1HWC0) {
std::vector<size_t> spec_shape = {};
spec_shape.emplace_back(shape[0]);
@ -663,12 +755,13 @@ void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_
(*output_desc)[kJShape] = spec_shape;
(*output_desc)[kJDataType] = kVTypeBool;
}
GenSuffixDescJson(output_desc);
}
void TbeKernelBuild::GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
size_t output_index, nlohmann::json *output_desc) {
std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index);
(*output_desc)[kJName] = NormalizeFullScopeName(output_desc_name);
(*output_desc)[kJName] = output_desc_name;
(*output_desc)[kJOutputIndex] = output_index;
std::vector<size_t> shape;
(*output_desc)[kJShape] = shape;
@ -692,6 +785,9 @@ bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name,
return true;
}
// <input_nodes> : contains parameter/data node, input order may doesn't match tbe input order;
// <compute_nodes> : contains cnode, inputs order may doesn't match tbe input order;
// Special process node list: reference tbe_adapter.cc [except: Conv2DBackpropInput]
bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes,
const std::vector<mindspore::AnfNodePtr> &compute_nodes,
std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers,
@ -722,7 +818,7 @@ bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &in
MS_LOG(INFO) << "Fusion info: add compute node's [" << i << "] input: " << input->fullname_with_scope();
layer.emplace_back((*find_iter));
} else {
MS_LOG(INFO) << "Fusion warnig: this input [" << i << "] may be pre compute(" << input->fullname_with_scope()
MS_LOG(INFO) << "Fusion warning: this input [" << i << "] may be pre compute(" << input->fullname_with_scope()
<< ") node's output.";
}
}
@ -750,8 +846,9 @@ bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::Anf
MS_EXCEPTION_IF_NULL(data_str);
MS_EXCEPTION_IF_NULL(index);
std::vector<nlohmann::json> output_desc_list;
// if data_input is null, this is optional input.
if (!data_input) {
MS_LOG(INFO) << "Data input is optional node";
MS_LOG(INFO) << "Fusion info: data input is optional node";
auto name = std::string(kOptional) + std::to_string(*index);
(*data_str)[kJName] = name;
nlohmann::json output_desc;
@ -767,12 +864,16 @@ bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::Anf
auto kernel_idx = AnfAlgo::VisitKernel(data_input, 0);
auto real_node = kernel_idx.first;
size_t real_idx = kernel_idx.second;
MS_LOG(INFO) << "Real name " << real_node->fullname_with_scope() << " index:" << real_idx;
MS_LOG(INFO) << "Fusion info: Real name: " << real_node->fullname_with_scope() << ". index:" << real_idx;
// kJOutputDesc
nlohmann::json output_desc;
GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type);
output_desc_list.push_back(output_desc);
(*data_str)[kJName] = NormalizeFullScopeName(real_node->fullname_with_scope());
auto full_name = real_node->fullname_with_scope();
if (real_idx > 0) {
full_name = full_name.append("_").append(std::to_string(real_idx));
}
(*data_str)[kJName] = full_name;
}
(*data_str)[kJOutputDesc] = output_desc_list;
(*data_str)[kJtype] = "Data";
@ -808,6 +909,7 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) {
size_t TbeKernelBuild::GetOptionalInput(const mindspore::CNodePtr &cnode, bool is_dynamic_input) {
MS_EXCEPTION_IF_NULL(cnode);
if (is_dynamic_input) {
// Node can not have optional & dynamic input.
return 0;
}
MS_EXCEPTION_IF_NULL(cnode);
@ -831,22 +933,46 @@ std::string TbeKernelBuild::GetRealOpType(const std::string &origin_type) {
return result;
}
std::string TbeKernelBuild::GetNodeFusionType(const mindspore::CNodePtr &cnode) {
MS_EXCEPTION_IF_NULL(cnode);
auto node_type = AnfAlgo::GetCNodeName(cnode);
static std::map<std::string, std::string> fusion_type_map = {{kConv2DOpName, "Convolution"},
{kBNTrainingReduceOpName, "bn_reduce"},
{kBNTrainingUpdateOpName, "bn_update"},
{kReluV2OpName, "ElemWise"},
{kTensorAddOpName, "ElemWise"},
{kConv2DBackpropInputOpName, "Conv2d_backprop_input"},
{kAddNOpName, "ElemWise"},
{kReluGradV2OpName, "ElemWise"},
{kRealDivOpName, "ElemWise"}};
auto find = fusion_type_map.find(node_type);
if (find == fusion_type_map.end()) {
MS_LOG(INFO) << "Fusion warning: get node fusion type failed, origin node type: " << node_type
<< " return null string.";
return "";
} else {
return find->second;
}
}
bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
std::vector<nlohmann::json> *input_desc_list, size_t *index) {
MS_EXCEPTION_IF_NULL(cnode);
MS_EXCEPTION_IF_NULL(input_desc_list);
std::vector<nlohmann::json> input_desc_list_tmp = {};
// 1. input json
bool is_dynamic_input = IsDynamicInput(cnode);
for (size_t i = 1; i < cnode->inputs().size(); ++i) {
auto input = cnode->input(i);
auto kernel_idx = AnfAlgo::VisitKernel(input, 0);
auto real_node = kernel_idx.first;
size_t real_idx = kernel_idx.second;
MS_LOG(INFO) << "Real name" << real_node->fullname_with_scope() << "index:" << real_idx;
MS_LOG(INFO) << "Fusion info: real name: " << real_node->fullname_with_scope() << ". index:" << real_idx;
nlohmann::json input_desc;
GenDescJson(real_node, real_idx, real_idx, &input_desc);
if (is_dynamic_input) {
// 2. dynamic input json
MS_LOG(INFO) << "Node has dynamic input.";
input_desc[kJDynIndex] = (i - 1);
}
@ -854,7 +980,8 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
}
size_t optional_num = GetOptionalInput(cnode, is_dynamic_input);
if (optional_num > 0) {
MS_LOG(INFO) << "Node has optional input.";
// 3. optional input
MS_LOG(INFO) << "Fusion info: node has optional input.";
for (size_t i = 0; i < optional_num; ++i) {
nlohmann::json optional_input_desc;
optional_input_desc[kJName] = std::string(kOptional) + std::to_string(*index);
@ -872,7 +999,7 @@ std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int> &o
std::vector<size_t> desc_output_index = {};
for (size_t idx = 0; idx < output_used_nums.size(); ++idx) {
auto output_use_num_item = output_used_nums[idx];
MS_LOG(INFO) << "Output used num[" << idx << "] = " << output_use_num_item;
MS_LOG(INFO) << "Fusion info: output used num[" << idx << "] = " << output_use_num_item;
desc_output_index.emplace_back(idx);
if (output_use_num_item > 1) {
desc_output_index.emplace_back(idx);
@ -887,7 +1014,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode
auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) {
auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum);
MS_LOG(INFO) << "This node's output has been reused, node name: " << cnode->fullname_with_scope();
MS_LOG(INFO) << "Fusion info: this node's output has been reused, node name: " << cnode->fullname_with_scope();
if (output_used_nums.size() != output_size) {
MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")"
<< " is not match output used num(" << output_used_nums.size() << ")";
@ -930,20 +1057,14 @@ bool TbeKernelBuild::GenFusionComputeJson(const mindspore::AnfNodePtr &compute_n
// gen output desc
std::vector<nlohmann::json> output_desc_list;
if (!GenFusionComputeOutputJson(cnode, &output_desc_list)) {
MS_LOG(INFO) << "Fusion Error: gen fusion output desc faild, node full name: " << cnode->fullname_with_scope();
MS_LOG(INFO) << "Fusion Error: gen fusion output desc failed, node full name: " << cnode->fullname_with_scope();
return false;
}
(*compute_op_str)[kJOutputDesc] = output_desc_list;
// gen others
auto origin_type = AnfAlgo::GetCNodeName(cnode);
// replace special op type for buffer fusion op
auto type = GetRealOpType(origin_type);
(*compute_op_str)[kJtype] = type;
tbe::TbeAdapter::NormalizeFuncName(&type);
(*compute_op_str)[kJFuncName] = type;
(*compute_op_str)[kJName] = NormalizeFullScopeName(cnode->fullname_with_scope());
(void)(*fusion_kernel_name).append("_");
(void)(*fusion_kernel_name).append(type);
// gen common desc
GenFusionComputeCommonJson(cnode, compute_op_str, fusion_kernel_name);
// gen prebuild args
GenFusionComputePreBuildJson(cnode, compute_op_str);
return true;
}
@ -965,7 +1086,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list,
MS_EXCEPTION_IF_NULL(output_size_list);
input_size_list->clear();
output_size_list->clear();
// cal input size for malloc
for (const auto &op : fusion_op_list) {
if (op[kJtype] == "Data") {
const auto &data_output_desc = op[kJOutputDesc];
@ -975,23 +1096,23 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list,
}
auto ret = GetIOSizeImpl(data_output);
input_size_list->push_back(ret);
MS_LOG(INFO) << "Fusion info: scope input name " << op[kJName] << ", size: " << ret;
MS_LOG(INFO) << "Fusion info: input node name " << op[kJName] << ", size: " << ret;
}
}
}
// cal output size for malloc
for (const auto &output_node : output_nodes) {
auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0);
auto real_node = kernel_idx.first;
size_t real_idx = kernel_idx.second;
auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope());
MS_LOG(INFO) << "Fusion info: real node name: " << normal_name << ", real output index: " << real_idx;
auto full_name = real_node->fullname_with_scope();
MS_LOG(INFO) << "Fusion info: real output node name: " << full_name << ", real output index: " << real_idx;
for (const auto &op : fusion_op_list) {
if (op[kJName] == normal_name) {
if (op[kJName] == full_name) {
auto op_output_desces = op[kJOutputDesc];
if (output_node != real_node) {
// tuple_get item
MS_LOG(INFO) << "Output is a tuple getitem node";
MS_LOG(INFO) << "Fusion info: output is a tuple get_item node";
auto output_desc = op_output_desces[real_idx];
if (output_desc[kJShape].empty()) {
MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx;
@ -1001,6 +1122,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list,
output_size_list->push_back(ret);
MS_LOG(INFO) << "Fusion info: scope output index " << real_idx << ", size: " << ret;
} else {
MS_LOG(INFO) << "Fusion info: output is self.";
for (const auto &output_desc : op_output_desces) {
if (output_desc[kJShape].empty()) {
MS_LOG(INFO) << "Fusion info: output_desc's shape is empty, may be this node output";

View File

@ -41,8 +41,8 @@ class TbeKernelBuild {
std::vector<size_t> *output_size_list);
// Ub Fuison
static bool GenFusionScopeJson(const std::vector<AnfNodePtr> &input_nodes,
const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_str,
std::string *fusion_kernel);
const std::vector<AnfNodePtr> &compute_nodes, nlohmann::json *fusion_json,
std::string *fusion_kernel_name);
static bool GetIOSize(const nlohmann::json &fusion_op_list, const std::vector<AnfNodePtr> &output_nodes,
std::vector<size_t> *input_size_list, std::vector<size_t> *output_size_list);
@ -61,9 +61,14 @@ class TbeKernelBuild {
static std::vector<size_t> GetDescOutputIndex(const std::vector<int> &output_used_nums);
static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode,
std::vector<nlohmann::json> *output_desc_list);
static void GenPreDescJson(nlohmann::json *output_desc);
static void GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str,
std::string *fusion_kernel_name);
static void GenFusionComputePreBuildJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str);
static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
size_t desc_output_idx, nlohmann::json *output_desc,
FusionDataType fusion_data_type = kFusionNormal);
static void GenSuffixDescJson(nlohmann::json *output_desc);
static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
size_t output_index, nlohmann::json *output_desc);
static size_t GetIOSizeImpl(const nlohmann::json &desc);
@ -76,6 +81,7 @@ class TbeKernelBuild {
static bool IsDynamicInput(const CNodePtr &cnode);
static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input);
static std::string GetRealOpType(const std::string &origin_type);
static std::string GetNodeFusionType(const CNodePtr &cnode);
};
class TbeKernelJsonCreator {
@ -84,14 +90,14 @@ class TbeKernelJsonCreator {
~TbeKernelJsonCreator() = default;
bool GenTbeSingleKernelJson(const std::shared_ptr<AnfNode> &anf_node, nlohmann::json *kernel_json);
std::string json_name() { return json_name_; }
bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
nlohmann::json *attrs_json);
private:
bool GenTbeInputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
nlohmann::json *inputs_json);
bool GenTbeOutputsJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
nlohmann::json *outputs_json);
bool GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpInfo> &op_info,
nlohmann::json *attrs_json);
static void ParseAttrValue(const std::string &type, const ValuePtr &value, nlohmann::json *attr_obj);
bool GenInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index, bool value,
const std::shared_ptr<OpIOInfo> &input_ptr, const string &op_input_name, size_t input_i,

View File

@ -33,42 +33,6 @@
namespace mindspore {
namespace kernel {
using mindspore::kernel::tbe::TbeUtils;
bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes) {
auto build_manger = std::make_shared<ParallelBuildManager>();
MS_EXCEPTION_IF_NULL(build_manger);
for (const auto &anf_node : anf_nodes) {
// gen kernel json
MS_EXCEPTION_IF_NULL(anf_node);
nlohmann::json kernel_json;
TbeKernelJsonCreator creator(OP_PRE_COMPILE);
if (!creator.GenTbeSingleKernelJson(anf_node, &kernel_json)) {
MS_LOG(ERROR) << "GenTbeSingleKernelJson failed";
return false;
}
kernel_json["compile_type"] = "pre_build";
// op build
auto task_id = build_manger->StartCompileOp(kernel_json);
build_manger->SavePreTaskInfo(task_id, anf_node);
}
while (!build_manger->IsAllPreTaskFinish()) {
int task_id = -1;
std::string task_result;
std::string pre_build_result;
auto ret = build_manger->WaitOne(&task_id, &task_result, &pre_build_result);
if (!ret) {
MS_EXCEPTION(ArgumentError) << "Pre Build Failed. wait one ret:" << ret << ", task id:" << task_id;
}
if (task_result != "Success") {
MS_EXCEPTION(ArgumentError) << "task pre compile Failed, task id:" << task_id << ", cause:" << task_result;
}
build_manger->PreTaskFinishProcess(task_id, pre_build_result);
}
return true;
}
bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
auto build_manger = std::make_shared<ParallelBuildManager>();
MS_EXCEPTION_IF_NULL(build_manger);
@ -122,15 +86,8 @@ bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes) {
return build_manger->GenSameOpKernelMod();
}
ParallelBuildManager::ParallelBuildManager() {}
ParallelBuildManager::~ParallelBuildManager() { ResetTaskInfo(); }
void ParallelBuildManager::SavePreTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node) {
MS_LOG(INFO) << "SavePreTaskInfo, task id: " << task_id;
pre_task_map_[task_id] = anf_node;
}
void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNodePtr &anf_node,
const std::string &json_name, const std::vector<size_t> &input_size_list,
const std::vector<size_t> &output_size_list, int32_t scope_id) {
@ -149,42 +106,11 @@ void ParallelBuildManager::SaveTaskInfo(int32_t task_id, const mindspore::AnfNod
task_map_[task_id] = task_info;
}
bool ParallelBuildManager::IsAllPreTaskFinish() const {
MS_LOG(INFO) << "wait pre build process task_num: " << pre_task_map_.size();
return pre_task_map_.empty();
}
bool ParallelBuildManager::IsAllTaskFinish() const {
MS_LOG(INFO) << "wait process task_num: " << task_map_.size();
return task_map_.empty();
}
void ParallelBuildManager::PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result) {
auto task_iter = pre_task_map_.find(task_id);
if (task_iter == pre_task_map_.end()) {
MS_EXCEPTION(ArgumentError) << "can find pre task_id:" << task_id;
}
auto node = task_iter->second;
auto builder =
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node));
std::string start_flag = "fusion_pattern_start";
std::string end_flag = "fusion_pattern_end";
int start = pre_build_result.find(start_flag);
int end = pre_build_result.find(end_flag);
if (start != -1 && end != -1 && end >= start) {
std::string result = pre_build_result.substr(start + start_flag.size(), end - start - start_flag.size());
if (result == "") {
(void)pre_task_map_.erase(task_iter);
return;
}
transform(result.begin(), result.end(), result.begin(), ::toupper);
FusionType fusion_type = tbe::GetFusionType(result);
builder->SetFusionType(fusion_type);
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), node.get());
}
(void)pre_task_map_.erase(task_iter);
}
std::pair<int32_t, KernelModPtr> ParallelBuildManager::TaskFinishProcess(int32_t task_id, bool set_kernel_mod) {
auto task_iter = task_map_.find(task_id);
if (task_iter == task_map_.end()) {

View File

@ -28,7 +28,6 @@
namespace mindspore {
namespace kernel {
bool TbeOpParallelPreBuild(const std::vector<AnfNodePtr> &anf_nodes);
bool TbeOpParallelBuild(const std::vector<AnfNodePtr> &anf_nodes);
struct KernelBuildTaskInfo {
@ -42,9 +41,8 @@ struct KernelBuildTaskInfo {
class ParallelBuildManager {
public:
ParallelBuildManager();
ParallelBuildManager() = default;
~ParallelBuildManager();
void SavePreTaskInfo(int32_t task_id, const AnfNodePtr &anf_node);
void SaveTaskInfo(int32_t task_id, const AnfNodePtr &anf_node, const std::string &json_name,
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
int32_t scope_id = 0);
@ -54,10 +52,7 @@ class ParallelBuildManager {
bool SearchInCache(const std::string &json_name, const std::string &processor,
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,
AnfNode *node) const;
bool IsAllPreTaskFinish() const;
bool IsAllTaskFinish() const;
void PreTaskFinishProcess(int32_t task_id, const std::string &pre_build_result);
std::pair<int32_t, KernelModPtr> TaskFinishProcess(int32_t task_id, bool set_kernel_mod = true);
KernelModPtr GenKernelMod(const string &json_name, const string &processor,
const std::vector<size_t> &input_size_list, const std::vector<size_t> &output_size_list,

View File

@ -1187,6 +1187,19 @@ TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputPrecision(const AnfNodePtr &node, s
return GetCNodeOutputPrecision(kernel_with_index.first);
}
bool AnfRuntimeAlgorithm::IsDynamicShape(const AnfNodePtr &node) {
if (!node->isa<CNode>()) {
return false;
}
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
auto has_attr = AnfAlgo::HasNodeAttr(kAttrIsDynamicShape, cnode);
if (!has_attr) {
return false;
}
return AnfAlgo::GetNodeAttr<bool>(node, kAttrIsDynamicShape);
}
bool AnfRuntimeAlgorithm::IsCondControlKernel(const CNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
if (node->inputs().empty()) {

View File

@ -217,6 +217,7 @@ class AnfRuntimeAlgorithm {
static TypeId GetCNodeOutputPrecision(const AnfNodePtr &node);
// get fix output precision from prev node, input_idx is the input index of current node related to prev node.
static TypeId GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx);
static bool IsDynamicShape(const AnfNodePtr &node);
static bool IsCondControlKernel(const CNodePtr &node);
static bool IsIndependentNode(const CNodePtr &node);
};

View File

@ -445,7 +445,6 @@ void AscendSession::InitRuntimeResource() {
}
void AscendSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) const {
device::ascend::KernelPreBuild(kernel_graph.get());
MS_LOG(INFO) << "HardwareOptimize start!";
opt::AscendBackendOptimization(kernel_graph);
opt::AscendGraphKernelCommonProcess(kernel_graph);

View File

@ -19,7 +19,8 @@
#include <vector>
#include <string>
#include <memory>
#include <set>
#include <map>
#include "runtime/device/ascend/kernel_select_ascend.h"
#include "runtime/device/kernel_info.h"
#include "backend/kernel_compiler/kernel.h"
@ -61,32 +62,6 @@ static kernel::KernelModPtr SerialCompileImpl(const AnfNodePtr &anf_node) {
return kernel_mod_ptr;
}
static bool KernelPreBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
std::vector<AnfNodePtr> tbe_nodes;
for (const auto &anf_node : kernel_graph_ptr->execution_order()) {
MS_EXCEPTION_IF_NULL(anf_node);
if (!AnfAlgo::IsRealKernel(anf_node)) {
continue;
}
KernelType kernel_type = AnfAlgo::GetKernelType(anf_node);
switch (kernel_type) {
case KernelType::TBE_KERNEL: {
if (AnfAlgo::GetKernelMod(anf_node) == nullptr &&
AnfAlgo::GetFusionType(anf_node) == kernel::FusionType::DYNAMIC) {
tbe_nodes.push_back(anf_node);
}
break;
}
default: {
break;
}
}
}
bool ret = kernel::TbeOpParallelPreBuild(tbe_nodes);
return ret;
}
static bool KernelBuildParallelCompile(const mindspore::session::KernelGraph *kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
std::vector<AnfNodePtr> tbe_nodes;
@ -237,12 +212,6 @@ static bool IsAtomicNode(const CNodePtr &kernel_node) {
return !(workspace_indexs.empty() && output_indexs.empty());
}
bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
bool ret = device::ascend::KernelPreBuildParallelCompile(kernel_graph_ptr);
return ret;
}
bool KernelBuild(const mindspore::session::KernelGraph *kernel_graph_ptr) {
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
TbeUtils::LoadCache();

View File

@ -22,10 +22,6 @@
namespace mindspore {
namespace device {
namespace ascend {
/**
* @brief kernel pre build for ascend.
*/
bool KernelPreBuild(const mindspore::session::KernelGraph *kernel_graph_ptr);
/**
* @brief kernel build for ascend.
*/

View File

@ -32,6 +32,7 @@ namespace mindspore {
// op name. Op which not exists in operator/ops.h, so define it's name here
constexpr auto kFour2FiveOpName = "Four2Five";
constexpr auto kFive2FourOpName = "Five2Four";
constexpr auto kConv2DOpName = "Conv2D";
constexpr auto kConvBN1OpName = "ConvBN1";
constexpr auto kBN2AddReluOpName = "BN2AddRelu";
constexpr auto kBN2ReLUOpName = "BN2Relu";
@ -273,6 +274,7 @@ constexpr auto kAttrPadDimSize = "pad_dim_size";
constexpr auto kAttrNumSegments = "num_segments";
constexpr auto kAttrBegin = "begin";
constexpr auto kAttrSize = "size";
constexpr auto kAttrIsDynamicShape = "is_dynamic_shape";
// attr value
constexpr auto kValueTargetSwitch = "target_switch";

View File

@ -13,6 +13,7 @@
# limitations under the License.
"""aicpu ops"""
from .unique import _unique_aicpu
from .init_data_set_queue import _init_data_set_queue_aicpu
from .embedding_lookup import _embedding_lookup_aicpu
from .padding import _padding_aicpu

View File

@ -0,0 +1,31 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Unique op"""
from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType
unique_op_info = AiCPURegOp("Unique") \
.fusion_type("OPAQUE") \
.input(0, "x", "required") \
.output(0, "y", "required") \
.output(1, "idx", "required") \
.dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \
.dtype_format(DataType.I64_Default, DataType.I64_Default, DataType.I64_Default) \
.get_op_info()
@op_info_register(unique_op_info)
def _unique_aicpu():
"""Unique AiCPU register"""
return

View File

@ -17,7 +17,7 @@
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
matmul_op_info = TBERegOp("MatMul") \
.fusion_type("ELEMWISE") \
.fusion_type("OPAQUE") \
.async_flag(False) \
.binfile_name("matmul.so") \
.compute_cost(10) \

View File

@ -91,6 +91,7 @@ from ._thor_ops import (CusBatchMatMul, CusCholeskyTrsm, CusFusedAbsMax1, CusImg
from .sparse_ops import SparseToDense
__all__ = [
'Unique',
'ReverseSequence',
'EditDistance',
'CropAndResize',

View File

@ -597,9 +597,9 @@ class Unique(Primitive):
containing indices of elements in the input coressponding to the output tensor.
Examples:
>>> x = Tensor(np.array([1, 2, 5, 2]), mindspore.float32)
>>> x = Tensor(np.array([1, 2, 5, 2]), mindspore.int32)
>>> out = P.Unique()(x)
(Tensor([1, 2, 5], mindspore.int32), Tensor([0, 1, 2, 1], mindspore.float32))
(Tensor([1, 2, 5], mindspore.int32), Tensor([0, 1, 2, 1], mindspore.int32))
"""
@prim_attr_register
def __init__(self):

View File

@ -35,39 +35,5 @@ StatusFactory::StatusFactory() {}
std::mutex& StatusFactory::GetMutex() { return GetInstance()->rwMutex_; }
TsdClient* TsdClient::GetInstance() {
static TsdClient instance;
return &instance;
}
/**
* @ingroup TsdClient
* @brief
*/
TsdClient::TsdClient() { rankSize_ = 1; }
/**
* @ingroup TsdClient
* @brief
*/
TsdClient::~TsdClient() = default;
/**
* @ingroup TsdClient
* @brief framework发送拉起hccp和computer process的命令
* @param [in] phyDeviceId : FMK传入物理ID
* @param [in] phyDeviceId : FMK传入rankSize
* @return TDT_OK:
*/
TDT_StatusT TsdClient::Open(const uint32_t deviceId, const uint32_t rankSize) { return TDT_OK; }
/**
* @ingroup TsdClient
* @brief TsdClient关闭相关资源
* @param
* @return TDT_OK:
*/
TDT_StatusT TsdClient::Close() { return TDT_OK; }
} // namespace tdt
#endif // TDT_MOCK_H