forked from mindspore-Ecosystem/mindspore
!46092 PyNative dynamic shape
Merge pull request !46092 from caifubi/r2.0.0-alpha
This commit is contained in:
commit
0d03bdec89
|
@ -28,7 +28,7 @@
|
|||
#include "backend/common/pass/convert_attr_to_unify_mindir.h"
|
||||
#include "backend/common/pass/optimize_updatestate.h"
|
||||
#include "backend/common/pass/conv_transpose_to_conv_bp.h"
|
||||
#include "backend/common/pass/reduce_sum_optimizer.h"
|
||||
#include "backend/common/pass/reduce_optimizer.h"
|
||||
#include "backend/common/pass/add_dynamic_shape_attr.h"
|
||||
#include "backend/common/pass/add_akg_kernel_attrs.h"
|
||||
#include "backend/common/pass/inplace_assign_for_custom_op.h"
|
||||
|
@ -62,7 +62,7 @@ void BackendCommonOptimization(const std::shared_ptr<session::KernelGraph> &kern
|
|||
auto common_pm = std::make_shared<PassManager>("common_pm");
|
||||
common_pm->AddPass(std::make_shared<AddDynamicShapeAttr>());
|
||||
common_pm->AddPass(std::make_shared<ConvertDynamicBroadcastTo>());
|
||||
common_pm->AddPass(std::make_shared<ReduceSumOptimizer>());
|
||||
common_pm->AddPass(std::make_shared<ReduceOptimizer>());
|
||||
common_pm->AddPass(std::make_shared<ConvertConstInputToAttr>());
|
||||
common_pm->AddPass(std::make_shared<CustomOpConstInputToAttr>());
|
||||
common_pm->AddPass(std::make_shared<ConvertConstInputToTensorInput>());
|
||||
|
@ -91,7 +91,7 @@ void OpBackendCommonOptimization(const std::shared_ptr<session::KernelGraph> &ke
|
|||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto optimizer = std::make_shared<GraphOptimizer>();
|
||||
auto common_pm = std::make_shared<PassManager>("op_common_pm");
|
||||
common_pm->AddPass(std::make_shared<ReduceSumOptimizer>());
|
||||
common_pm->AddPass(std::make_shared<ReduceOptimizer>());
|
||||
common_pm->AddPass(std::make_shared<ConvertConstInputToTensorInput>());
|
||||
optimizer->AddPassManager(common_pm);
|
||||
(void)optimizer->Optimize(kernel_graph);
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/common/pass/reduce_sum_optimizer.h"
|
||||
#include "backend/common/pass/reduce_optimizer.h"
|
||||
#include <vector>
|
||||
#include "include/common/utils/anfalgo.h"
|
||||
#include "utils/ms_context.h"
|
||||
|
@ -25,7 +25,7 @@ namespace {
|
|||
constexpr int axis_input_index = 2;
|
||||
} // namespace
|
||||
|
||||
AnfNodePtr ReduceSumOptimizer::NewRankOp(const AnfNodePtr &cnode, const KernelGraphPtr &kernel_graph) const {
|
||||
AnfNodePtr ReduceOptimizer::NewRankOp(const AnfNodePtr &cnode, const KernelGraphPtr &kernel_graph) const {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
std::vector<AnfNodePtr> rank_inputs;
|
||||
|
@ -39,7 +39,7 @@ AnfNodePtr ReduceSumOptimizer::NewRankOp(const AnfNodePtr &cnode, const KernelGr
|
|||
return rank_op;
|
||||
}
|
||||
|
||||
AnfNodePtr ReduceSumOptimizer::NewRangeOp(const AnfNodePtr &rank_op, const KernelGraphPtr &kernel_graph) const {
|
||||
AnfNodePtr ReduceOptimizer::NewRangeOp(const AnfNodePtr &rank_op, const KernelGraphPtr &kernel_graph) const {
|
||||
MS_EXCEPTION_IF_NULL(rank_op);
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
std::vector<AnfNodePtr> range_inputs;
|
||||
|
@ -68,15 +68,15 @@ AnfNodePtr ReduceSumOptimizer::NewRangeOp(const AnfNodePtr &rank_op, const Kerne
|
|||
return range_op;
|
||||
}
|
||||
|
||||
AnfNodePtr ReduceSumOptimizer::InsertAssistNode(const CNodePtr &cnode, const KernelGraphPtr &) const {
|
||||
AnfNodePtr ReduceOptimizer::InsertAssistNode(const CNodePtr &cnode, const KernelGraphPtr &) const {
|
||||
// the input dim is unknown, need rank + range, don't supported now;
|
||||
MS_LOG(EXCEPTION)
|
||||
<< "Can not support the case that input is dim unknown and axis is empty or axis contain value less 0. node: "
|
||||
<< trace::DumpSourceLines(cnode);
|
||||
}
|
||||
|
||||
AnfNodePtr ReduceSumOptimizer::CreateValueNodeWithVector(const CNodePtr &cnode, const KernelGraphPtr &kernel_graph,
|
||||
const std::vector<int64_t> &axis) const {
|
||||
AnfNodePtr ReduceOptimizer::CreateValueNodeWithVector(const CNodePtr &cnode, const KernelGraphPtr &kernel_graph,
|
||||
const std::vector<int64_t> &axis) const {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto new_value_node = NewValueNode(MakeValue<std::vector<int64_t>>(axis));
|
||||
|
@ -92,8 +92,8 @@ AnfNodePtr ReduceSumOptimizer::CreateValueNodeWithVector(const CNodePtr &cnode,
|
|||
return new_node;
|
||||
}
|
||||
|
||||
AnfNodePtr ReduceSumOptimizer::HandleAxisWithEmptyTensor(const CNodePtr &cnode, const KernelGraphPtr &kernel_graph,
|
||||
const AnfNodePtr &axis_input) const {
|
||||
AnfNodePtr ReduceOptimizer::HandleAxisWithEmptyTensor(const CNodePtr &cnode, const KernelGraphPtr &kernel_graph,
|
||||
const AnfNodePtr &axis_input) const {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_EXCEPTION_IF_NULL(axis_input);
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
|
@ -125,7 +125,7 @@ AnfNodePtr ReduceSumOptimizer::HandleAxisWithEmptyTensor(const CNodePtr &cnode,
|
|||
// 2: the value of axis_input contain the value less 0,
|
||||
// the new tensor of the new value node should be "shape.size() + the_old_value_less_0",
|
||||
// the shape is the first input'shape of ReduceSum;
|
||||
AnfNodePtr ReduceSumOptimizer::NewAssistValueNode(const CNodePtr &cnode, const KernelGraphPtr &kernel_graph) const {
|
||||
AnfNodePtr ReduceOptimizer::NewAssistValueNode(const CNodePtr &cnode, const KernelGraphPtr &kernel_graph) const {
|
||||
// axis is a tuple ,maybe empty or contain a value less 0;
|
||||
if (cnode->inputs().size() <= axis_input_index) {
|
||||
return nullptr;
|
||||
|
@ -170,18 +170,18 @@ AnfNodePtr ReduceSumOptimizer::NewAssistValueNode(const CNodePtr &cnode, const K
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
const AnfNodePtr ReduceSumOptimizer::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
|
||||
const EquivPtr &) const {
|
||||
const AnfNodePtr ReduceOptimizer::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
|
||||
const EquivPtr &) const {
|
||||
MS_EXCEPTION_IF_NULL(func_graph);
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto op_name = common::AnfAlgo::GetCNodeName(cnode);
|
||||
if (op_name != kReduceSumOpName) {
|
||||
if (op_name != kReduceSumOpName && op_name != kReduceMeanOpName) {
|
||||
MS_LOG(DEBUG) << "Current node is not: " << kReduceSumOpName << ", skip!";
|
||||
return nullptr;
|
||||
}
|
||||
if (!common::AnfAlgo::IsDynamicShape(cnode)) {
|
||||
if (!common::AnfAlgo::IsDynamicShape(cnode) && !common::AnfAlgo::HasNodeAttr(kAttrMutableKernel, cnode)) {
|
||||
MS_LOG(DEBUG) << "Current node is not dynamic shape, skip!";
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -191,7 +191,7 @@ const AnfNodePtr ReduceSumOptimizer::Process(const FuncGraphPtr &func_graph, con
|
|||
return NewAssistValueNode(cnode, kernel_graph);
|
||||
}
|
||||
|
||||
const BaseRef ReduceSumOptimizer::DefinePattern() const {
|
||||
const BaseRef ReduceOptimizer::DefinePattern() const {
|
||||
std::shared_ptr<Var> V = std::make_shared<CondVar>(UnVisited);
|
||||
std::shared_ptr<Var> Xs = std::make_shared<SeqVar>();
|
||||
return VectorRef({V, Xs});
|
|
@ -13,18 +13,18 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_PASS_REDUCE_SUM_OPTIMIZER_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_PASS_REDUCE_SUM_OPTIMIZER_H_
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_OPTIMIZER_PASS_REDUCE_OPTIMIZER_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_OPTIMIZER_PASS_REDUCE_OPTIMIZER_H_
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "backend/common/optimizer/optimizer.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace opt {
|
||||
class ReduceSumOptimizer : public PatternProcessPass {
|
||||
class ReduceOptimizer : public PatternProcessPass {
|
||||
public:
|
||||
explicit ReduceSumOptimizer(bool multigraph = true) : PatternProcessPass("reduce_sum_optimizer", multigraph) {}
|
||||
~ReduceSumOptimizer() override = default;
|
||||
explicit ReduceOptimizer(bool multigraph = true) : PatternProcessPass("reduce_optimizer", multigraph) {}
|
||||
~ReduceOptimizer() override = default;
|
||||
const BaseRef DefinePattern() const override;
|
||||
const AnfNodePtr Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const override;
|
||||
|
||||
|
@ -41,4 +41,4 @@ class ReduceSumOptimizer : public PatternProcessPass {
|
|||
} // namespace opt
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_PASS_REDUCE_SUM_OPTIMIZER_H_
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_OPTIMIZER_PASS_REDUCE_OPTIMIZER_H_
|
|
@ -444,9 +444,8 @@ void SessionBasic::GetSingleOpGraphInfo(const CNodePtr &kernel, const InputTenso
|
|||
std::ostringstream buf;
|
||||
auto prim = common::AnfAlgo::GetCNodePrimitive(kernel);
|
||||
MS_EXCEPTION_IF_NULL(prim);
|
||||
buf << GetOpRunDeviceTarget(prim) << "_";
|
||||
buf << prim->id() << "_";
|
||||
bool has_const_input = false;
|
||||
buf << GetOpRunDeviceTarget(prim) << "_dynamic" << op_run_info->base_op_run_info.use_dynamic_shape_process << "_";
|
||||
buf << prim->name() << "_";
|
||||
for (size_t i = 0; i < input_tensors.size(); ++i) {
|
||||
auto &tensor = input_tensors[i];
|
||||
MS_EXCEPTION_IF_NULL(tensor);
|
||||
|
@ -472,7 +471,6 @@ void SessionBasic::GetSingleOpGraphInfo(const CNodePtr &kernel, const InputTenso
|
|||
}
|
||||
// For constant input
|
||||
if (input_tensors_mask[i] == kValueNodeTensorMask) {
|
||||
has_const_input = true;
|
||||
buf << common::AnfAlgo::GetTensorValueString(tensor);
|
||||
}
|
||||
buf << "_";
|
||||
|
@ -483,20 +481,6 @@ void SessionBasic::GetSingleOpGraphInfo(const CNodePtr &kernel, const InputTenso
|
|||
(void)std::for_each(attr_map.begin(), attr_map.end(),
|
||||
[&buf](const auto &element) { buf << element.second->ToString(); });
|
||||
|
||||
// Generally, different inputs can have different output; but different constant inputs may lead to different output
|
||||
if (has_const_input) {
|
||||
buf << "_";
|
||||
const AbstractBasePtr &abstract = kernel->abstract();
|
||||
MS_EXCEPTION_IF_NULL(abstract);
|
||||
auto build_shape = abstract->BuildShape();
|
||||
MS_EXCEPTION_IF_NULL(build_shape);
|
||||
auto build_type = abstract->BuildType();
|
||||
MS_EXCEPTION_IF_NULL(build_type);
|
||||
// Get output shape
|
||||
buf << build_shape->ToString();
|
||||
// Get output dtype
|
||||
buf << build_type->type_id();
|
||||
}
|
||||
*graph_info = buf.str();
|
||||
}
|
||||
|
||||
|
@ -840,6 +824,10 @@ void SessionBasic::GetOpInputTensors(const CNodePtr &cnode,
|
|||
InputTensorInfo *input_tensor_info) const {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_EXCEPTION_IF_NULL(input_tensor_info);
|
||||
auto context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context);
|
||||
std::string device_target = context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||
auto is_mutable = common::AnfAlgo::HasNodeAttr(kAttrMutableKernel, cnode);
|
||||
std::vector<size_t> const_input_attr_index = {};
|
||||
GetConstValueDepend(cnode, &const_input_attr_index);
|
||||
const auto input_tensor_num = common::AnfAlgo::GetInputTensorNum(cnode);
|
||||
|
@ -866,8 +854,13 @@ void SessionBasic::GetOpInputTensors(const CNodePtr &cnode,
|
|||
is_forward_output = true;
|
||||
}
|
||||
}
|
||||
input_tensor_info->input_tensors_mask.emplace_back(
|
||||
(is_value_node || !is_forward_output) ? kValueNodeTensorMask : kParameterDataTensorMask);
|
||||
if (is_mutable && device_target == kAscendDevice) {
|
||||
input_tensor_info->input_tensors_mask.emplace_back(
|
||||
(is_value_node && !is_forward_output) ? kValueNodeTensorMask : kParameterDataTensorMask);
|
||||
} else {
|
||||
input_tensor_info->input_tensors_mask.emplace_back(
|
||||
(is_value_node || !is_forward_output) ? kValueNodeTensorMask : kParameterDataTensorMask);
|
||||
}
|
||||
} else if (real_input->isa<Parameter>()) {
|
||||
tensor = GetParameterOutputTensor(real_input, parameter_index, graph_inputs);
|
||||
input_tensor_info->input_tensors_mask.emplace_back(tensor->is_parameter() ? kParameterWeightTensorMask
|
||||
|
@ -1276,6 +1269,11 @@ std::shared_ptr<KernelGraph> SessionBasic::ConstructSingleOpGraph(const BackendO
|
|||
// set execution order
|
||||
auto cnode = graph->NewCNode(inputs);
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto is_mutable_kernel = common::AnfAlgo::HasNodeAttr(kAttrMutableKernel, cnode) &&
|
||||
common::AnfAlgo::GetNodeAttr<bool>(cnode, kAttrMutableKernel);
|
||||
if (is_mutable_kernel) {
|
||||
graph->set_flag(kAttrMutableKernel, true);
|
||||
}
|
||||
// set abstract,which include inferred shapes and types
|
||||
cnode->set_abstract(op_run_info->base_op_run_info.abstract);
|
||||
common::AnfAlgo::SetNodeAttr(kAttrOutputIsDynamicShape, MakeValue(op_run_info->base_op_run_info.has_dynamic_output),
|
||||
|
@ -1288,7 +1286,7 @@ std::shared_ptr<KernelGraph> SessionBasic::ConstructSingleOpGraph(const BackendO
|
|||
// set execution order
|
||||
std::vector<CNodePtr> exe_order = {cnode};
|
||||
graph->set_execution_order(exe_order);
|
||||
if (is_ascend) {
|
||||
if (is_ascend && !is_mutable_kernel) {
|
||||
graph->set_output(cnode);
|
||||
} else {
|
||||
CreateOutputNode(cnode, graph);
|
||||
|
|
|
@ -682,9 +682,13 @@ void MindRTBackend::RunGraphBySingleOp(const GraphCompilerInfo &graph_compiler_i
|
|||
graph_compiler_->CalculateForwardOpOutputCount(graph, inputs[graph_index], &forward_op_output_tensor_id_);
|
||||
}
|
||||
|
||||
auto is_mutable = graph->has_flag(kAttrMutableKernel);
|
||||
bool use_dynamic_shape_process = root_graph_->has_flag(kFlagUseDynamicShapeProcess);
|
||||
py::gil_scoped_release release;
|
||||
for (const auto &kernel : graph->execution_order()) {
|
||||
if (is_mutable) {
|
||||
common::AnfAlgo::SetNodeAttr(kAttrMutableKernel, MakeValue(true), kernel);
|
||||
}
|
||||
InputTensorInfo input_tensor_info;
|
||||
VectorRef op_outputs;
|
||||
if (common::AnfAlgo::IsControlOpExecInBackend(kernel)) {
|
||||
|
@ -712,6 +716,9 @@ void MindRTBackend::RunGraphBySingleOp(const GraphCompilerInfo &graph_compiler_i
|
|||
graph_compiler_->GetSingleOpRunInfoAndGraphInfo(kernel, input_tensor_info, use_dynamic_shape_process,
|
||||
&op_run_info, &graph_info, &graph_output_info);
|
||||
if (use_dynamic_shape_process) {
|
||||
op_run_info->op_prim->AddAttr(kAttrMutableKernel, MakeValue(true));
|
||||
op_run_info->op_prim->AddAttr(kAttrInputIsDynamicShape, MakeValue(true));
|
||||
op_run_info->op_prim->AddAttr(kAttrOutputIsDynamicShape, MakeValue(true));
|
||||
RunOpDynamic(op_run_info, &op_outputs);
|
||||
} else {
|
||||
RunOp(op_run_info, &op_outputs);
|
||||
|
@ -725,6 +732,9 @@ void MindRTBackend::RunGraphBySingleOp(const GraphCompilerInfo &graph_compiler_i
|
|||
}
|
||||
WaitTaskFinish();
|
||||
}
|
||||
if (is_dynamic_ || root_graph_->has_flag(kFlagUseDynamicShapeProcess)) {
|
||||
ClearResource();
|
||||
}
|
||||
}
|
||||
|
||||
void MindRTBackend::RunGraphByCondition(const ActorInfo &actor_info, const GraphCompilerInfo &graph_compiler_info,
|
||||
|
@ -1398,5 +1408,15 @@ void MindRTBackend::UpdateOutput(const std::vector<session::KernelWithIndex> &ou
|
|||
outputs->emplace_back(output_tensor);
|
||||
}
|
||||
}
|
||||
|
||||
void MindRTBackend::ClearResource() {
|
||||
graph_compiler_ = std::make_shared<GraphCompiler>();
|
||||
graph_id_to_device_context_.clear();
|
||||
func_graph_to_kernel_graph_ids_.clear();
|
||||
graph_info_to_device_context_.clear();
|
||||
control_nodes_.clear();
|
||||
actor_to_graph_compiler_info_.clear();
|
||||
cnode_ref_counts_.clear();
|
||||
}
|
||||
} // namespace compile
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -130,6 +130,9 @@ class BACKEND_EXPORT MindRTBackend : public MindRTBackendBase {
|
|||
|
||||
void OpRunCallback(const std::shared_ptr<pynative::OpTaskContext> &context);
|
||||
|
||||
// Clean the compilation cache to avoid memory leakage in dynamic shape scenarios.
|
||||
void ClearResource();
|
||||
|
||||
// Cache output tensor ref count of kernels for back propagation graph in PyNative mode.
|
||||
std::map<GraphId, std::map<KernelWithIndex, size_t>> cnode_ref_counts_;
|
||||
|
||||
|
|
|
@ -285,9 +285,9 @@ const ActorInfo &MindRTBackendBase::CompileGraphs(const FuncGraphPtr &func_graph
|
|||
}
|
||||
|
||||
AnfUtils::CloseAbstractLock();
|
||||
bool is_dynamic = IsFuncGraphDynamicShapeOrStruct(func_graph, func_graph_cell_id);
|
||||
is_dynamic_ = IsFuncGraphDynamicShapeOrStruct(func_graph, func_graph_cell_id);
|
||||
AnfUtils::OpenAbstractLock();
|
||||
if (!is_dynamic) {
|
||||
if (!is_dynamic_) {
|
||||
auto iter = graph_actor_infos_.find(func_graph_cell_id);
|
||||
if (iter != graph_actor_infos_.end()) {
|
||||
return iter->second;
|
||||
|
@ -405,9 +405,13 @@ void MindRTBackendBase::CompileGraph(const GraphSegmentPtr &segment, device::Run
|
|||
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
// Compile graph.
|
||||
auto graph_id =
|
||||
graph_compiler_->CompileGraph(segment, outputs, device_context, run_mode, ms_execution_mode_ == kPynativeMode);
|
||||
GraphId graph_id;
|
||||
if (is_dynamic_ || root_graph_->has_flag(kFlagUseDynamicShapeProcess)) {
|
||||
graph_id = graph_compiler_->CompileDynamicGraph(segment, outputs, device_context);
|
||||
} else {
|
||||
graph_id =
|
||||
graph_compiler_->CompileGraph(segment, outputs, device_context, run_mode, ms_execution_mode_ == kPynativeMode);
|
||||
}
|
||||
|
||||
graph_id_to_device_context_[graph_id] = device_context;
|
||||
|
||||
|
|
|
@ -150,6 +150,7 @@ class BACKEND_EXPORT MindRTBackendBase : public Backend {
|
|||
// Save the mapping between cell id and actor info.
|
||||
mindspore::HashMap<std::string, ActorInfo> graph_actor_infos_;
|
||||
bool enable_backend_dynamic_detect_{false};
|
||||
bool is_dynamic_{false};
|
||||
FuncGraphPtr root_graph_;
|
||||
GraphPartitionPtr graph_partition_;
|
||||
std::shared_ptr<GraphCompiler> graph_compiler_;
|
||||
|
|
|
@ -608,6 +608,7 @@ constexpr auto kAttrInputIsDynamicShape = "input_is_dynamic_shape";
|
|||
constexpr auto kAttrOutputIsDynamicShape = "output_is_dynamic_shape";
|
||||
constexpr auto kAttrPynativeNextOpName = "next_op";
|
||||
constexpr auto kAttrPynativeNextIndex = "next_index";
|
||||
constexpr auto kAttrMutableKernel = "mutable_kernel";
|
||||
constexpr auto kAttrCompileInfo = "compile_info";
|
||||
constexpr auto kAttrFusionType = "fusion_type";
|
||||
constexpr auto kAttrStride = "stride";
|
||||
|
|
|
@ -89,9 +89,8 @@ void GetSingleOpGraphInfo(const FrontendOpRunInfoPtr &op_run_info, const std::st
|
|||
<< tensors_mask.size();
|
||||
}
|
||||
std::ostringstream buf;
|
||||
buf << cur_target << "_";
|
||||
buf << cur_target << "_dynamic" << op_run_info->base_op_run_info.use_dynamic_shape_process << "_";
|
||||
buf << op_run_info->base_op_run_info.op_name << "_";
|
||||
bool has_const_input = false;
|
||||
const auto &op_prim = op_run_info->op_prim;
|
||||
MS_EXCEPTION_IF_NULL(op_prim);
|
||||
bool has_hidden_side_effect = op_prim->HasAttr(GRAPH_FLAG_SIDE_EFFECT_HIDDEN);
|
||||
|
@ -120,7 +119,6 @@ void GetSingleOpGraphInfo(const FrontendOpRunInfoPtr &op_run_info, const std::st
|
|||
}
|
||||
// For constant input
|
||||
if (tensors_mask[index] == kValueNodeTensorMask) {
|
||||
has_const_input = true;
|
||||
buf << common::AnfAlgo::GetTensorValueString(input_tensor);
|
||||
}
|
||||
buf << "_";
|
||||
|
@ -130,20 +128,6 @@ void GetSingleOpGraphInfo(const FrontendOpRunInfoPtr &op_run_info, const std::st
|
|||
(void)std::for_each(attr_map.begin(), attr_map.end(),
|
||||
[&buf](const auto &element) { buf << element.second->ToString(); });
|
||||
|
||||
// Constant input affects output, operators like DropoutGenMask whose output is related to values of input when input
|
||||
// shapes are the same but values are different
|
||||
if (has_const_input) {
|
||||
buf << "_";
|
||||
auto abstr = op_run_info->base_op_run_info.abstract;
|
||||
MS_EXCEPTION_IF_NULL(abstr);
|
||||
auto build_shape = abstr->BuildShape();
|
||||
MS_EXCEPTION_IF_NULL(build_shape);
|
||||
buf << build_shape->ToString();
|
||||
auto build_type = abstr->BuildType();
|
||||
MS_EXCEPTION_IF_NULL(build_type);
|
||||
buf << build_type->type_id();
|
||||
}
|
||||
|
||||
// Operator with hidden side effect.
|
||||
if (has_hidden_side_effect) {
|
||||
buf << "_" << std::to_string(op_prim->id());
|
||||
|
@ -210,8 +194,7 @@ FrontendOpRunInfoPtr ForwardExecutor::GenerateOpRunInfo(const py::args &args) co
|
|||
// Used for async run
|
||||
op_run_info->grad_flag = grad()->grad_flag();
|
||||
op_run_info->custom_bprop_cell_count = grad()->custom_bprop_cell_count();
|
||||
op_run_info->base_op_run_info.use_dynamic_shape_process =
|
||||
(device_target_ == kAscendDevice ? false : grad()->use_dynamic_shape_process());
|
||||
op_run_info->base_op_run_info.use_dynamic_shape_process = grad()->use_dynamic_shape_process();
|
||||
op_run_info->base_op_run_info.op_name = args[static_cast<size_t>(RunOpArgsEnum::PY_NAME)].cast<std::string>();
|
||||
op_run_info->base_op_run_info.lazy_build = lazy_build_;
|
||||
PyNativeAlgo::PyParser::SetPrim(op_run_info, args[static_cast<size_t>(RunOpArgsEnum::PY_PRIM)]);
|
||||
|
@ -454,6 +437,9 @@ ValuePtr ForwardExecutor::RunOpInMs(const FrontendOpRunInfoPtr &op_run_info) {
|
|||
MS_EXCEPTION_IF_NULL(cur_mind_rt_backend);
|
||||
bool use_dynamic_shape_process = op_run_info->base_op_run_info.use_dynamic_shape_process;
|
||||
if (use_dynamic_shape_process) {
|
||||
backend_op_run_info->op_prim->AddAttr(kAttrMutableKernel, MakeValue(true));
|
||||
backend_op_run_info->op_prim->AddAttr(kAttrInputIsDynamicShape, MakeValue(true));
|
||||
backend_op_run_info->op_prim->AddAttr(kAttrOutputIsDynamicShape, MakeValue(true));
|
||||
cur_mind_rt_backend->RunOpDynamic(backend_op_run_info, &outputs);
|
||||
} else {
|
||||
cur_mind_rt_backend->RunOp(backend_op_run_info, &outputs);
|
||||
|
|
|
@ -674,8 +674,7 @@ void GradExecutor::GetGradGraph(const ad::GradAttr &grad_attr, const std::vector
|
|||
auto bprop_graph = GetBpropGraph(grad_attr, w_args, p_args);
|
||||
MS_EXCEPTION_IF_NULL(bprop_graph);
|
||||
bprop_graph->set_flag(kFlagIsPynativeBpropGraph, true);
|
||||
bool use_dynamic_shape_process = (forward()->device_target() == kAscendDevice ? false : use_dynamic_shape_process_);
|
||||
bprop_graph->set_flag(kFlagUseDynamicShapeProcess, use_dynamic_shape_process);
|
||||
bprop_graph->set_flag(kFlagUseDynamicShapeProcess, use_dynamic_shape_process_);
|
||||
MS_EXCEPTION_IF_NULL(top_input_args_info_);
|
||||
bprop_graph->set_attr(kAttrFuncGraphCellId, MakeValue(top_input_args_info_->obj_id));
|
||||
auto resource = top_cell()->resource();
|
||||
|
@ -1750,12 +1749,12 @@ bool GradExecutor::IsDynamicDetectNodeInfoChange(const DynamicDetectNodeInfoPtr
|
|||
MS_EXCEPTION_IF_NULL(old_node_info);
|
||||
|
||||
// 1.Detect ms_function phase
|
||||
if (is_ms_function_node != old_node_info->is_graph_node ||
|
||||
(is_ms_function_node && graph_phase != old_node_info->graph_phase)) {
|
||||
MS_LOG(DEBUG) << "graph is dynamic, old is_graph_node:" << old_node_info->is_graph_node
|
||||
if (is_ms_function_node) {
|
||||
MS_LOG(DEBUG) << "Graph info, old is_graph_node:" << old_node_info->is_graph_node
|
||||
<< " new is_graph_node:" << is_ms_function_node << " old graph_phase" << old_node_info->graph_phase
|
||||
<< " new graph_phase:" << graph_phase;
|
||||
return true;
|
||||
return is_ms_function_node != old_node_info->is_graph_node ||
|
||||
(is_ms_function_node && graph_phase != old_node_info->graph_phase);
|
||||
}
|
||||
|
||||
// 2.Detect cnode prim
|
||||
|
|
|
@ -50,7 +50,7 @@ ge::DataType GeTypesConvert::TransTypeIdToGeDataType(TypeId type_id) {
|
|||
{TypeId::kNumberTypeInt, ge::DataType::DT_INT32}, {TypeId::kNumberTypeInt64, ge::DataType::DT_INT64},
|
||||
{TypeId::kNumberTypeUInt32, ge::DataType::DT_UINT32}, {TypeId::kNumberTypeUInt, ge::DataType::DT_UINT32},
|
||||
{TypeId::kNumberTypeUInt64, ge::DataType::DT_UINT64}, {TypeId::kNumberTypeBool, ge::DataType::DT_BOOL},
|
||||
{TypeId::kNumberTypeInt64, ge::DataType::DT_DOUBLE}, {TypeId::kTypeUnknown, ge::DataType::DT_UNDEFINED}};
|
||||
{TypeId::kNumberTypeFloat64, ge::DataType::DT_DOUBLE}, {TypeId::kTypeUnknown, ge::DataType::DT_UNDEFINED}};
|
||||
auto iter = data_type_map.find(type_id);
|
||||
if (iter == data_type_map.end()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid data type:" << type_id << ": " << TypeIdLabel(type_id);
|
||||
|
|
|
@ -35,6 +35,10 @@
|
|||
#include "utils/trace_base.h"
|
||||
#include "mindspore/core/ops/op_name.h"
|
||||
|
||||
#include "kernel/common_utils.h"
|
||||
#include "kernel/kernel.h"
|
||||
#include "kernel/kernel_build_info.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
|
@ -704,6 +708,45 @@ void ResetPreFixedFormat(const CNodePtr &kernel_node, kernel::KernelBuildInfoPtr
|
|||
common::AnfAlgo::EraseNodeAttr(kAttrFixedInputFormat, kernel_node);
|
||||
common::AnfAlgo::EraseNodeAttr(kAttrFixedOutputFormat, kernel_node);
|
||||
}
|
||||
|
||||
void SetKernelWithDefaultInfo(const CNodePtr &kernel_node) {
|
||||
auto builder = kernel::KernelBuildInfo::KernelBuildInfoBuilder();
|
||||
builder.SetProcessor(kernel::AICORE);
|
||||
builder.SetFusionType(kernel::UNKNOWN_FUSION_TYPE);
|
||||
builder.SetOpPattern(kernel::kCommonPattern);
|
||||
builder.SetKernelType(TBE_KERNEL);
|
||||
|
||||
auto input_size = common::AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
std::vector<std::string> inputs_format;
|
||||
std::vector<TypeId> inputs_device_type;
|
||||
std::vector<std::string> inputs_reshape_type(input_size, "");
|
||||
for (size_t i = 0; i < input_size; ++i) {
|
||||
auto type_id = AnfAlgo::GetPrevNodeOutputDeviceDataType(kernel_node, i);
|
||||
if (type_id == kTypeUnknown) {
|
||||
type_id = common::AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, i);
|
||||
}
|
||||
auto format = AnfAlgo::GetPrevNodeOutputFormat(kernel_node, i);
|
||||
inputs_device_type.emplace_back(type_id);
|
||||
inputs_format.emplace_back(format);
|
||||
}
|
||||
builder.SetInputsDeviceType(inputs_device_type);
|
||||
builder.SetInputsFormat(inputs_format);
|
||||
builder.SetInputsReshapeType(inputs_reshape_type);
|
||||
|
||||
auto output_size = common::AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
std::vector<std::string> outputs_format;
|
||||
std::vector<TypeId> outputs_device_type;
|
||||
std::vector<std::string> outputs_reshape_type(output_size, "");
|
||||
for (size_t i = 0; i < output_size; ++i) {
|
||||
auto type_id = common::AnfAlgo::GetOutputInferDataType(kernel_node, i);
|
||||
outputs_device_type.emplace_back(type_id);
|
||||
outputs_format.emplace_back(kOpFormat_DEFAULT);
|
||||
}
|
||||
builder.SetOutputsDeviceType(outputs_device_type);
|
||||
builder.SetOutputsFormat(outputs_format);
|
||||
builder.SetOutputsReshapeType(outputs_reshape_type);
|
||||
AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernel_node.get());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void SetTensorDeviceInfo(const CNodePtr &kernel_node) {
|
||||
|
@ -890,6 +933,11 @@ void SetRaiseOrReduceFlag(const CNodePtr &kernel_node, KernelSelectStatus status
|
|||
|
||||
void SetAclKernelInfo(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
if (!common::AnfAlgo::HasNodeAttr(kAttrMutableKernel, kernel_node)) {
|
||||
MS_LOG(INFO) << "No is_dynamic_kernel attr found, cannot set ACL KERNEL for " << kernel_node->DebugString();
|
||||
return;
|
||||
}
|
||||
|
||||
KernelType kernel_type = AnfAlgo::GetKernelType(kernel_node);
|
||||
if (kernel_type != AICPU_KERNEL && kernel_type != TBE_KERNEL) {
|
||||
MS_LOG(INFO) << "Current node don't support acl kernel launch! Node info:" << kernel_node->DebugString();
|
||||
|
@ -903,22 +951,11 @@ void SetAclKernelInfo(const CNodePtr &kernel_node) {
|
|||
MS_LOG(INFO) << "Current mode or device don't support acl kernel launch! Node info:" << kernel_node->DebugString();
|
||||
return;
|
||||
}
|
||||
if (!common::AnfAlgo::IsDynamicShape(kernel_node)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (common::AnfAlgo::IsGraphKernel(kernel_node) || IsPrimitiveCNode(kernel_node, prim::kPrimCustom)) {
|
||||
MS_LOG(INFO) << "Current node is graph kernel or custom io! Node info:" << kernel_node->DebugString();
|
||||
return;
|
||||
}
|
||||
auto op_type = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
if (kAclBlackList.count(op_type) != 0) {
|
||||
MS_LOG(INFO) << "Current node in acl black list! Node info:" << kernel_node->DebugString();
|
||||
return;
|
||||
}
|
||||
if (kAclKernelSet.count(op_type) == 0) {
|
||||
MS_LOG(INFO) << "Current node in acl black list! Node info:" << kernel_node->DebugString();
|
||||
return;
|
||||
}
|
||||
|
||||
// Update node's kernel type to acl.
|
||||
auto new_builder =
|
||||
|
@ -988,15 +1025,24 @@ std::tuple<KernelSelectStatus, std::string, ExceptionType> SelectKernelInfoWithM
|
|||
}
|
||||
// The kernel info can not find in ai_cpu kernel lists and ai_core kernel lists
|
||||
if (select_status == kNoMatched) {
|
||||
GatherInputAndOutputInferType(aicpu_in_out_info, kernel_node);
|
||||
std::get<0>(result) = select_status;
|
||||
auto [msg, etype] = CollectNotMatchMessage(kernel_info_list, aicpu_kernel_info_list, aicore_in_out_info,
|
||||
aicpu_in_out_info, kernel_node);
|
||||
constexpr int one = 1;
|
||||
constexpr int two = 2;
|
||||
std::get<one>(result) = msg;
|
||||
std::get<two>(result) = etype;
|
||||
return result;
|
||||
if (common::AnfAlgo::HasNodeAttr(kAttrMutableKernel, kernel_node)) {
|
||||
MS_LOG(WARNING) << "NOT FOUND KernelBuildInfo for " << kernel_node->fullname_with_scope()
|
||||
<< ". Set default KernelBuildInfo.";
|
||||
SetKernelWithDefaultInfo(kernel_node);
|
||||
SetTensorDeviceInfo(kernel_node);
|
||||
select_status = kStatusAllMatched;
|
||||
SetAclKernelInfo(kernel_node);
|
||||
} else {
|
||||
GatherInputAndOutputInferType(aicpu_in_out_info, kernel_node);
|
||||
std::get<0>(result) = select_status;
|
||||
auto [msg, etype] = CollectNotMatchMessage(kernel_info_list, aicpu_kernel_info_list, aicore_in_out_info,
|
||||
aicpu_in_out_info, kernel_node);
|
||||
constexpr int one = 1;
|
||||
constexpr int two = 2;
|
||||
std::get<one>(result) = msg;
|
||||
std::get<two>(result) = etype;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
SetRaiseOrReduceFlag(kernel_node, select_status);
|
||||
std::get<0>(result) = select_status;
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
|
||||
#include "plugin/device/ascend/hal/hardware/ascend_graph_optimization.h"
|
||||
#include <set>
|
||||
#include <unordered_set>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "backend/common/optimizer/common_backend_optimization.h"
|
||||
#include "plugin/device/ascend/optimizer/ascend_backend_optimization.h"
|
||||
|
@ -53,6 +55,8 @@ void RemoveUnusedValueNode(const KernelGraphPtr &graph) {
|
|||
graph->RemoveNodeFromGraph(value_node);
|
||||
}
|
||||
}
|
||||
|
||||
const std::unordered_set<std::string> kDefaultFormatAclOps = {kAddNOpName};
|
||||
} // namespace
|
||||
|
||||
void AscendGraphOptimization::Reset() {
|
||||
|
@ -87,9 +91,14 @@ void AscendGraphOptimization::OptimizeGraph(const KernelGraphPtr &graph) {
|
|||
|
||||
void AscendGraphOptimization::OptimizeSingleOpGraph(const KernelGraphPtr &graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
opt::RunOpAscendBackendIRFusionOptimization(graph);
|
||||
SelectKernel(graph);
|
||||
opt::RunOpAscendBackendOptimization(graph);
|
||||
|
||||
if (graph->has_flag(kAttrMutableKernel)) {
|
||||
AclOpOptimize(graph);
|
||||
} else {
|
||||
opt::RunOpAscendBackendIRFusionOptimization(graph);
|
||||
SelectKernel(graph);
|
||||
opt::RunOpAscendBackendOptimization(graph);
|
||||
}
|
||||
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
|
@ -102,6 +111,55 @@ void AscendGraphOptimization::OptimizeSingleOpGraph(const KernelGraphPtr &graph)
|
|||
memo_.clear();
|
||||
}
|
||||
|
||||
void AscendGraphOptimization::AclOpOptimize(const KernelGraphPtr &graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
opt::RunOpIRFissionForAcl(graph);
|
||||
|
||||
auto nodes = graph->execution_order();
|
||||
for (auto &node : nodes) {
|
||||
common::AnfAlgo::SetNodeAttr(kAttrMutableKernel, MakeValue(true), node);
|
||||
}
|
||||
SelectKernel(graph);
|
||||
|
||||
// Change format to DefaultFormat.
|
||||
bool need_change_format = false;
|
||||
for (auto &node : nodes) {
|
||||
if (kDefaultFormatAclOps.count(common::AnfAlgo::GetCNodeName(node))) {
|
||||
need_change_format = true;
|
||||
auto new_builder =
|
||||
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node));
|
||||
MS_EXCEPTION_IF_NULL(new_builder);
|
||||
auto inputs_format = AnfAlgo::GetAllInputFormats(node);
|
||||
auto outputs_format = AnfAlgo::GetAllOutputFormats(node);
|
||||
new_builder->SetInputsFormat(std::vector<std::string>(inputs_format.size(), kOpFormat_DEFAULT));
|
||||
new_builder->SetOutputsFormat(std::vector<std::string>(outputs_format.size(), kOpFormat_DEFAULT));
|
||||
AnfAlgo::SetSelectKernelBuildInfo(new_builder->Build(), node.get());
|
||||
}
|
||||
}
|
||||
|
||||
bool has_aicpu = std::any_of(nodes.begin(), nodes.end(),
|
||||
[](const CNodePtr &node) { return AnfAlgo::GetKernelType(node) == AICPU_KERNEL; });
|
||||
if (has_aicpu || need_change_format) {
|
||||
// Insert Cast and TransData.
|
||||
opt::RunOpAscendBackendOptimization(graph);
|
||||
} else {
|
||||
// Only insert Cast.
|
||||
opt::AscendMixPrecision(graph);
|
||||
}
|
||||
|
||||
// Replace all TBE_KERNEL with ACL_KERNEL.
|
||||
for (const auto &node : graph->execution_order()) {
|
||||
if (AnfAlgo::GetKernelType(node) == TBE_KERNEL) {
|
||||
auto new_builder =
|
||||
std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>(AnfAlgo::GetSelectKernelBuildInfo(node));
|
||||
MS_EXCEPTION_IF_NULL(new_builder);
|
||||
new_builder->SetKernelType(ACL_KERNEL);
|
||||
MS_LOG(INFO) << "SUCCESS SET ACL KERNEL FOR" << node->DebugString();
|
||||
AnfAlgo::SetSelectKernelBuildInfo(new_builder->Build(), node.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AscendGraphOptimization::OptimizeGraphWithoutDeviceInfo(const KernelGraphPtr &graph) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
CheckControlFlowDynamicShape(graph);
|
||||
|
|
|
@ -66,6 +66,7 @@ class AscendGraphOptimization {
|
|||
|
||||
void GetAllGraphs(const KernelGraphPtr &root_graph);
|
||||
void CheckControlFlowDynamicShape(const KernelGraphPtr &root_graph);
|
||||
void AclOpOptimize(const KernelGraphPtr &graph);
|
||||
|
||||
// Manager for the optimized graphs
|
||||
FuncGraphManagerPtr graph_manager_;
|
||||
|
|
|
@ -306,9 +306,8 @@ bool AscendKernelExecutor::MemoryCopyAsync(const CNodePtr &node, const vector<Ad
|
|||
const vector<AddressPtr> &outputs) const {
|
||||
MS_LOG(DEBUG) << "Launch MemoryCopyAsync instead for kernel " << node->fullname_with_scope();
|
||||
if (inputs.size() != 1 || outputs.size() != 1) {
|
||||
MS_LOG(ERROR) << "Kernel " << node->fullname_with_scope() << " input output size should be 1 but"
|
||||
<< " input size is:" << inputs.size() << " output size is:" << outputs.size();
|
||||
return false;
|
||||
MS_LOG(WARNING) << "Kernel " << node->fullname_with_scope() << " input output size should be 1 but"
|
||||
<< " input size is:" << inputs.size() << " output size is:" << outputs.size();
|
||||
}
|
||||
|
||||
const auto stream = AscendStreamMng::GetInstance().GetStream(kDefaultStreamIndex);
|
||||
|
|
|
@ -28,13 +28,13 @@ namespace mindspore {
|
|||
namespace kernel {
|
||||
namespace {
|
||||
static const std::unordered_set<std::string> kAclStaticList = {kPackOpName,
|
||||
kAddNOpName,
|
||||
kTensorMoveOpName,
|
||||
kConcatDOpName,
|
||||
kCheckValidOpName,
|
||||
kBiasAddOpName,
|
||||
kBiasAddGradOpName,
|
||||
kConv3DTransposeOpName,
|
||||
kTileOpName,
|
||||
kROIAlignName,
|
||||
kDynamicGRUV2OpName,
|
||||
kSoftmaxCrossEntropyWithLogitsOpName};
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "runtime/rt.h"
|
||||
#include "ir/tensor.h"
|
||||
#include "include/common/utils/anfalgo.h"
|
||||
#include "kernel/common_utils.h"
|
||||
|
@ -36,20 +37,18 @@ int AclKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector
|
|||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
|
||||
if (!common::AnfAlgo::IsDynamicShape(cnode)) {
|
||||
MS_LOG(EXCEPTION) << "The node is not dynamic shape: " << cnode->fullname_with_scope();
|
||||
}
|
||||
|
||||
size_t input_num = common::AnfAlgo::GetInputTensorNum(cnode);
|
||||
std::vector<size_t> useless_input_lists;
|
||||
// Update input size list
|
||||
for (size_t i = 0; i < input_size_list_.size(); ++i) {
|
||||
for (size_t i = 0; i < input_num; ++i) {
|
||||
auto index = AnfAlgo::GetInputGraphIdxByKernelIdx(node, i);
|
||||
if (index >= input_size_list_.size()) {
|
||||
MS_LOG(EXCEPTION) << "Error real index:" << index;
|
||||
}
|
||||
TypeId type_id = AnfAlgo::GetInputDeviceDataType(node, index);
|
||||
auto [input, idx] = common::AnfAlgo::GetPrevNodeOutput(node, index);
|
||||
auto type_id = AnfAlgo::GetOutputDeviceDataType(input, idx);
|
||||
auto type_size = GetTypeByte(TypeIdToType(type_id));
|
||||
auto shape = AnfAlgo::GetInputDeviceShape(node, index);
|
||||
auto shape = AnfAlgo::GetOutputDeviceShape(input, idx);
|
||||
if (IsDynamic(shape)) {
|
||||
MS_LOG(ERROR) << "Please check infer op shape before resize, error input index is:" << i;
|
||||
return 1;
|
||||
|
@ -65,10 +64,24 @@ int AclKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector
|
|||
(void)useless_input_lists.emplace_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
auto acl_input_size = GeOpConvertor::GetAclInputSize(cnode);
|
||||
if (acl_input_size > input_num) {
|
||||
for (size_t i = input_num; i < acl_input_size; i++) {
|
||||
input_size_list_[i] = SIZE_MAX;
|
||||
}
|
||||
}
|
||||
common::AnfAlgo::SetNodeAttr(kAttrUselessInput, MakeValue(useless_input_lists), node);
|
||||
|
||||
// Update output size list
|
||||
size_t output_num = common::AnfAlgo::GetOutputTensorNum(cnode);
|
||||
AscendKernelMod::UpdateOutputSizeList();
|
||||
auto acl_output_size = GeOpConvertor::GetAclOutputSize(cnode);
|
||||
if (acl_output_size > output_num) {
|
||||
for (size_t i = output_num; i < acl_output_size; i++) {
|
||||
output_size_list_[i] = SIZE_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
if (!AclUtils::UpdateTensorDesc(node, &input_desc_list_, &output_desc_list_)) {
|
||||
MS_LOG(EXCEPTION) << "Fail to update op desc: " << node->fullname_with_scope();
|
||||
|
@ -159,6 +172,10 @@ bool AclKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vect
|
|||
return false;
|
||||
}
|
||||
|
||||
if (rtStreamSynchronize(stream_ptr) != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "aclopCompileAndExecute sync failed";
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "Success launch of node: " << op_type_;
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -25,6 +25,8 @@
|
|||
#include "kernel/common_utils.h"
|
||||
#include "backend/common/session/anf_runtime_algorithm.h"
|
||||
|
||||
#include "plugin/device/ascend/hal/device/ge_types_convert.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace {
|
||||
|
@ -537,10 +539,11 @@ std::vector<GeTensorDescPtr> AclUtils::GetInputTensorDesc(const AnfNodePtr &anf_
|
|||
continue;
|
||||
}
|
||||
(void)already_add_index.insert(index + 1);
|
||||
auto ori_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(anf_node, index);
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShape(anf_node, index);
|
||||
auto input_type = AnfAlgo::GetInputDeviceDataType(anf_node, index);
|
||||
auto input_format = AnfAlgo::GetInputFormat(anf_node, index);
|
||||
auto [input, idx] = common::AnfAlgo::GetPrevNodeOutput(anf_node, index);
|
||||
auto ori_shape = common::AnfAlgo::GetOutputInferShape(input, idx);
|
||||
auto input_shape = AnfAlgo::GetOutputDeviceShape(input, idx);
|
||||
auto input_type = AnfAlgo::GetOutputDeviceDataType(input, idx);
|
||||
auto input_format = AnfAlgo::GetOutputFormat(input, idx);
|
||||
auto ori_format = IsOneOf3DFormat(input_format) ? kOpFormat_NCDHW : kOpFormat_DEFAULT;
|
||||
auto input_desc = GeOpConvertor::GetTensorDesc(input_shape, input_type, input_format, ori_shape, ori_format);
|
||||
MS_EXCEPTION_IF_NULL(input_desc);
|
||||
|
@ -591,6 +594,7 @@ std::set<std::string> AclUtils::GetUselessOutputs(const AnfNodePtr &node) {
|
|||
|
||||
std::vector<GeTensorDescPtr> AclUtils::GetOutputTensorDesc(const AnfNodePtr &anf_node) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
|
||||
size_t output_num = common::AnfAlgo::GetOutputTensorNum(anf_node);
|
||||
std::vector<GeTensorDescPtr> res;
|
||||
auto useless_outputs = GetUselessOutputs(anf_node);
|
||||
|
@ -630,6 +634,9 @@ std::shared_ptr<OpInfo> AclUtils::GetKernelOpInfo(const AnfNodePtr &node) {
|
|||
auto node_name = common::AnfAlgo::GetCNodeName(node);
|
||||
auto is_dynamic_shape = common::AnfAlgo::IsDynamicShape(node);
|
||||
auto op_info_ptr = kernel::OpLib::FindOp(node_name, kernel::kImplyTBE, is_dynamic_shape);
|
||||
if (op_info_ptr == nullptr) {
|
||||
return kernel::OpLib::FindOp(node_name, kernel::kImplyAICPU);
|
||||
}
|
||||
return op_info_ptr;
|
||||
}
|
||||
|
||||
|
@ -666,6 +673,7 @@ std::vector<std::string> AclUtils::GetOpInputAnchorNames(const AnfNodePtr &node)
|
|||
|
||||
std::vector<std::string> AclUtils::GetOpOutputAnchorNames(const AnfNodePtr &node) {
|
||||
auto op_info_ptr = GetKernelOpInfo(node);
|
||||
MS_EXCEPTION_IF_NULL(op_info_ptr);
|
||||
auto outputs_ptr = op_info_ptr->outputs_ptr();
|
||||
std::vector<std::string> output_names;
|
||||
for (const auto &out_item : outputs_ptr) {
|
||||
|
|
|
@ -432,7 +432,8 @@ void CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_p
|
|||
return;
|
||||
}
|
||||
|
||||
if (!common::AnfAlgo::IsDynamicShape(anf_node)) {
|
||||
if (!common::AnfAlgo::IsDynamicShape(anf_node) &&
|
||||
!common::AnfAlgo::HasNodeAttr(kAttrMutableKernel, anf_node->cast<CNodePtr>())) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -476,7 +477,8 @@ KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
|
|||
op_name = kInitData;
|
||||
}
|
||||
std::shared_ptr<AicpuOpKernelMod> kernel_mod_ptr;
|
||||
if (common::AnfAlgo::IsDynamicShape(anf_node)) {
|
||||
if (common::AnfAlgo::IsDynamicShape(anf_node) ||
|
||||
common::AnfAlgo::HasNodeAttr(kAttrMutableKernel, anf_node->cast<CNodePtr>())) {
|
||||
kernel_mod_ptr = std::make_shared<DynamicAicpuOpKernelMod>(anf_node);
|
||||
} else {
|
||||
kernel_mod_ptr = std::make_shared<AicpuOpKernelMod>(anf_node);
|
||||
|
|
|
@ -61,9 +61,6 @@ int DynamicAicpuOpKernelMod::Resize(const BaseOperatorPtr &base_operator, const
|
|||
MS_EXCEPTION_IF_NULL(node);
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (!common::AnfAlgo::IsDynamicShape(cnode)) {
|
||||
MS_LOG(EXCEPTION) << "The node is not dynamic shape: " << cnode->fullname_with_scope();
|
||||
}
|
||||
if (common::AnfAlgo::GetCNodeName(cnode) == kGetNextOpName) {
|
||||
auto wingman_queue = device::GetTdtWingManQueue(cnode);
|
||||
std::vector<device::DataQueueItem> data;
|
||||
|
@ -142,11 +139,6 @@ bool DynamicAicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, cons
|
|||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_LOG(INFO) << "Start launch of node: " << cnode->fullname_with_scope();
|
||||
|
||||
// is dynamic shape
|
||||
if (!common::AnfAlgo::IsDynamicShape(cnode)) {
|
||||
MS_LOG(EXCEPTION) << "The cnode is not dynamic shape:" << cnode->fullname_with_scope();
|
||||
}
|
||||
|
||||
// copy extinfo to device
|
||||
AllocateExtInfoDeviceAddr(cnode);
|
||||
MS_EXCEPTION_IF_NULL(ext_info_handler_);
|
||||
|
@ -186,10 +178,6 @@ void DynamicAicpuOpKernelMod::SyncData() {
|
|||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_LOG(INFO) << "Aicpu " << cnode->fullname_with_scope() << " PostExecute";
|
||||
// is dynamic shape
|
||||
if (!common::AnfAlgo::IsDynamicShape(cnode)) {
|
||||
MS_LOG(EXCEPTION) << "The cnode is not dynamic shape:" << cnode->fullname_with_scope();
|
||||
}
|
||||
|
||||
if (unknow_type_ != device::ascend::UnknowShapeOpType::DEPEND_COMPUTE ||
|
||||
common::AnfAlgo::GetCNodeName(cnode) == kGetNextOpName) {
|
||||
|
|
|
@ -84,9 +84,6 @@ int HostKernelMod::Resize(const BaseOperatorPtr &, const std::vector<KernelTenso
|
|||
MS_EXCEPTION_IF_NULL(node);
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (!common::AnfAlgo::IsDynamicShape(cnode)) {
|
||||
MS_LOG(EXCEPTION) << "The node is not dynamic shape: " << cnode->fullname_with_scope();
|
||||
}
|
||||
|
||||
if (!Init(cnode)) {
|
||||
MS_LOG(EXCEPTION) << "Init failed, node:" << cnode->fullname_with_scope();
|
||||
|
|
|
@ -107,7 +107,7 @@ std::shared_ptr<OpInfo> TbeDynamicShapeUtil::FindOp(const std::string &op_name,
|
|||
|
||||
std::shared_ptr<OpInfo> TbeDynamicShapeUtil::FindOp(const std::string &op_name, const CNodePtr &cnode) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto is_dynamic_shape = GetDynamicShapeAttr(cnode);
|
||||
auto is_dynamic_shape = GetDynamicShapeAttr(cnode) || common::AnfAlgo::HasNodeAttr(kAttrMutableKernel, cnode);
|
||||
auto op_info = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kImplyTBE, is_dynamic_shape);
|
||||
// If have no dynamic shape op, get static shape op
|
||||
if (op_info != nullptr && !op_info->dynamic_shape() && is_dynamic_shape) {
|
||||
|
|
|
@ -495,6 +495,46 @@ void RunOpAscendBackendIRFusionOptimization(const std::shared_ptr<session::Kerne
|
|||
#endif
|
||||
}
|
||||
|
||||
void RunOpIRFissionForAcl(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
if (!context_ptr->get_param<bool>(MS_CTX_IR_FUSION_FLAG)) {
|
||||
MS_LOG(INFO) << "IRFusion is not enable, skip";
|
||||
return;
|
||||
}
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
bool save_graphs = context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG);
|
||||
if (save_graphs) {
|
||||
DumpIR("hwopt_d_ir_fusion_before.ir", kernel_graph);
|
||||
}
|
||||
#endif
|
||||
auto optimizer = std::make_shared<GraphOptimizer>();
|
||||
auto ir_fusion_pm = std::make_shared<PassManager>("ir_fission_pm");
|
||||
ir_fusion_pm->AddPass(std::make_shared<ClipByNormFission>());
|
||||
ir_fusion_pm->AddPass(std::make_shared<EraseVisitAttr>());
|
||||
ir_fusion_pm->AddPass(std::make_shared<TensorScatterUpdateFission>());
|
||||
ir_fusion_pm->AddPass(std::make_shared<TensorScatterAddFission>());
|
||||
ir_fusion_pm->AddPass(std::make_shared<TensorScatterSubFission>());
|
||||
ir_fusion_pm->AddPass(std::make_shared<TensorScatterMaxFission>());
|
||||
ir_fusion_pm->AddPass(std::make_shared<TensorScatterMinFission>());
|
||||
ir_fusion_pm->AddPass(std::make_shared<EraseVisitAttr>());
|
||||
const auto &pass_creators =
|
||||
opt::Factory<PatternProcessPass>::Instance().GetPassCreatorsByType(kPassType::kIRFusionFissionPass);
|
||||
for (const auto &pass_creator : pass_creators) {
|
||||
ir_fusion_pm->AddPass(pass_creator.second());
|
||||
}
|
||||
|
||||
optimizer->AddPassManager(ir_fusion_pm);
|
||||
(void)optimizer->Optimize(kernel_graph);
|
||||
kernel_graph->SetExecOrderByDefault();
|
||||
#ifdef ENABLE_DUMP_IR
|
||||
if (save_graphs) {
|
||||
DumpIR("hwopt_d_ir_fusion_after.ir", kernel_graph);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void RunOpAscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
// data layout optimization
|
||||
|
|
|
@ -21,6 +21,7 @@ namespace mindspore {
|
|||
namespace opt {
|
||||
void RunOpAscendDataLayout(const std::shared_ptr<session::KernelGraph> &kernel_graph);
|
||||
void RunOpAscendBackendIRFusionOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph);
|
||||
void RunOpIRFissionForAcl(const std::shared_ptr<session::KernelGraph> &kernel_graph);
|
||||
void RunOpAscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph);
|
||||
void AscendDataLayout(const std::shared_ptr<session::KernelGraph> &kernel_graph);
|
||||
void AscendMixPrecision(const std::shared_ptr<session::KernelGraph> &kernel_graph);
|
||||
|
|
|
@ -470,7 +470,7 @@ CNodePtr AddCastOpNodeToGraph(const FuncGraphPtr &func_graph, const AnfNodePtr &
|
|||
common::AnfAlgo::SetNodeAttr(kAttrInputIsDynamicShape, MakeValue(true), cast);
|
||||
common::AnfAlgo::SetNodeAttr(kAttrOutputIsDynamicShape, MakeValue(true), cast);
|
||||
}
|
||||
common::AnfAlgo::SetNodeAttr("dst_type", TypeIdToType(origin_type), cast);
|
||||
common::AnfAlgo::SetNodeAttr("dst_type", TypeIdToType(output_type), cast);
|
||||
AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), cast.get());
|
||||
common::AnfAlgo::SetOutputTypeAndDetailShape({origin_type}, {origin_shape}, cast.get());
|
||||
common::AnfAlgo::SetNodeAttr(kIsBackendCast, MakeValue(true), cast);
|
||||
|
|
|
@ -28,9 +28,13 @@ const BaseRef ConvertUnSupportNodeToAICPU::DefinePattern() const {
|
|||
return VectorRef({X, Xs});
|
||||
}
|
||||
|
||||
const AnfNodePtr ConvertUnSupportNodeToAICPU::Process(const mindspore::FuncGraphPtr &,
|
||||
const AnfNodePtr ConvertUnSupportNodeToAICPU::Process(const mindspore::FuncGraphPtr &graph,
|
||||
const mindspore::AnfNodePtr &node,
|
||||
const mindspore::EquivPtr &) const {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
if (graph->has_flag(kAttrMutableKernel)) {
|
||||
return nullptr;
|
||||
}
|
||||
if (node == nullptr || !node->isa<CNode>()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -49,9 +49,9 @@ const AnfNodePtr ConvertDataTypeForCNodeInput(const AnfNodePtr &node, size_t inp
|
|||
} else if (infer_type == src_type) {
|
||||
// Create cast primitive.
|
||||
PrimitivePtr cast_prim = std::make_shared<Primitive>(prim::kPrimCast->name());
|
||||
(void)cast_prim->AddAttr("dst_type", MakeValue(static_cast<size_t>(dest_type)));
|
||||
(void)cast_prim->AddAttr("DstT", MakeValue(static_cast<size_t>(dest_type)));
|
||||
(void)cast_prim->AddAttr("SrcT", MakeValue(static_cast<size_t>(src_type)));
|
||||
(void)cast_prim->AddAttr("dst_type", TypeIdToType(dest_type));
|
||||
(void)cast_prim->AddAttr("DstT", TypeIdToType(dest_type));
|
||||
(void)cast_prim->AddAttr("SrcT", TypeIdToType(src_type));
|
||||
// Create dest type node.
|
||||
auto dest_type_ptr = TypeIdToType(dest_type);
|
||||
auto dest_type_node = NewValueNode(dest_type_ptr);
|
||||
|
|
|
@ -27,12 +27,12 @@
|
|||
#include "plugin/device/ascend/kernel/tbe/tbe_dynamic_shape_util.h"
|
||||
|
||||
namespace mindspore::opt {
|
||||
const AnfNodePtr AscendVmOpAdapter::Process(const FuncGraphPtr &, const AnfNodePtr &node, const EquivPtr &) const {
|
||||
const AnfNodePtr AscendVmOpAdapter::Process(const FuncGraphPtr &graph, const AnfNodePtr &node, const EquivPtr &) const {
|
||||
if (node == nullptr || !AnfUtils::IsRealCNodeKernel(node)) {
|
||||
return nullptr;
|
||||
}
|
||||
auto op_name = common::AnfAlgo::GetCNodeName(node);
|
||||
auto is_dynamic = common::AnfAlgo::IsDynamicShape(node);
|
||||
auto is_dynamic = common::AnfAlgo::IsDynamicShape(node) || graph->has_flag(kAttrMutableKernel);
|
||||
auto op_adaptation_info =
|
||||
OpAdaptationInfoRegister::GetInstance().GetOpAdaptationInfo(op_name, kAscendDevice, is_dynamic);
|
||||
if (op_adaptation_info == nullptr) {
|
||||
|
|
|
@ -181,7 +181,7 @@ CNodePtr CreateDropoutGenMaskCNode(const FuncGraphPtr &func_graph, const CNodePt
|
|||
std::vector<AnfNodePtr> dropout_gen_mask_inputs =
|
||||
use_v3 ? std::vector<AnfNodePtr>{NewValueNode(std::make_shared<Primitive>(kDropoutGenMaskV3OpName))}
|
||||
: std::vector<AnfNodePtr>{NewValueNode(std::make_shared<Primitive>(kDropoutGenMaskOpName))};
|
||||
if (input_shape->IsDynamic()) {
|
||||
if (input_shape->IsDynamic() || common::AnfAlgo::HasNodeAttr(kAttrMutableKernel, dropout)) {
|
||||
CNodePtr dynamic_shape = CreateDynamicShapeCNode(func_graph, dropout->input(kIndex1), input_shape);
|
||||
dynamic_shape->set_scope(dropout->scope());
|
||||
dropout_gen_mask_inputs.push_back(dynamic_shape);
|
||||
|
@ -198,7 +198,7 @@ CNodePtr CreateDropoutGenMaskCNode(const FuncGraphPtr &func_graph, const CNodePt
|
|||
}
|
||||
|
||||
std::shared_ptr<abstract::AbstractTensor> gen_mask_abstract;
|
||||
if (input_shape->IsDynamic()) {
|
||||
if (input_shape->IsDynamic() || common::AnfAlgo::HasNodeAttr(kAttrMutableKernel, dropout)) {
|
||||
ShapeVector mask_shp = {abstract::Shape::kShapeDimAny};
|
||||
ShapeVector mask_min_shp = CalGenMaskOutputShape(input_shape->min_shape());
|
||||
ShapeVector mask_max_shp = CalGenMaskOutputShape(input_shape->max_shape());
|
||||
|
@ -404,7 +404,6 @@ const AnfNodePtr DropoutUnifyMindIR1::Process(const FuncGraphPtr &func_graph, co
|
|||
MS_EXCEPTION_IF_NULL(dropout_cnode);
|
||||
|
||||
auto inputx_type_id = GetInputXDataType(dropout_cnode);
|
||||
auto keep_prob_value = CreateKeepPorbValueNode(func_graph, dropout_cnode, inputx_type_id);
|
||||
|
||||
CheckCNodeInputSize(dropout_cnode, kDropoutInputTensorNum);
|
||||
auto dropout_input = dropout_cnode->input(kIndex1);
|
||||
|
@ -416,12 +415,16 @@ const AnfNodePtr DropoutUnifyMindIR1::Process(const FuncGraphPtr &func_graph, co
|
|||
dropout_gen_mask = GetRecomputeDropoutGenMask(func_graph, dropout_cnode);
|
||||
}
|
||||
if (dropout_gen_mask == nullptr) {
|
||||
dropout_gen_mask = CreateDropoutGenMaskCNode(func_graph, dropout_cnode, keep_prob_value, input_shape, use_v3);
|
||||
dropout_gen_mask = CreateDropoutGenMaskCNode(func_graph, dropout_cnode,
|
||||
CreateKeepPorbValueNode(func_graph, dropout_cnode, inputx_type_id),
|
||||
input_shape, use_v3);
|
||||
}
|
||||
// CreateDropoutDoMask
|
||||
auto do_mask_abstract = std::make_shared<abstract::AbstractTensor>(TypeIdToType(inputx_type_id), input_shape);
|
||||
auto dropout_do_mask = CreateDropoutDoMaskCNode(
|
||||
func_graph, dropout_cnode, {dropout_input, dropout_gen_mask, keep_prob_value}, do_mask_abstract, use_v3);
|
||||
func_graph, dropout_cnode,
|
||||
{dropout_input, dropout_gen_mask, CreateKeepPorbValueNode(func_graph, dropout_cnode, inputx_type_id)},
|
||||
do_mask_abstract, use_v3);
|
||||
|
||||
std::vector<AnfNodePtr> make_tuple_inputs{NewValueNode(prim::kPrimMakeTuple), dropout_do_mask, dropout_gen_mask};
|
||||
auto make_tuple = func_graph->NewCNode(make_tuple_inputs);
|
||||
|
|
|
@ -306,6 +306,7 @@ GraphId GraphCompiler::CompileGraph(const GraphSegmentPtr &segment, const AnfNod
|
|||
KernelGraphPtr graph =
|
||||
session_->ConstructKernelGraph(nodes, outputs, device_terget, true, IsEnableZeroCopy(run_in_pynative));
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
|
||||
opt::EliminateIllegalDataTypePass(graph);
|
||||
SetGraphDependency(graph, segment);
|
||||
|
||||
|
@ -371,6 +372,54 @@ GraphId GraphCompiler::CompileGraph(const GraphSegmentPtr &segment, const AnfNod
|
|||
return graph_id;
|
||||
}
|
||||
|
||||
GraphId GraphCompiler::CompileDynamicGraph(const GraphSegmentPtr &segment, const AnfNodePtrList &outputs,
|
||||
const DeviceContext *device_context) {
|
||||
MS_EXCEPTION_IF_NULL(session_);
|
||||
MS_EXCEPTION_IF_NULL(segment);
|
||||
MS_EXCEPTION_IF_NULL(device_context);
|
||||
MS_LOG(INFO) << "Status record: start compile graph.";
|
||||
auto nodes = segment->nodes_;
|
||||
auto device_terget = device_context->GetDeviceType();
|
||||
// Generate kernel graph.
|
||||
KernelGraphPtr graph = session_->ConstructKernelGraph(nodes, outputs, device_terget, true, false);
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
|
||||
graph->set_flag(kAttrMutableKernel, true);
|
||||
|
||||
opt::EliminateIllegalDataTypePass(graph);
|
||||
// Unify the MindIR, must be before of the graph optimization.
|
||||
auto deprecated_kernel_executor =
|
||||
dynamic_cast<device::DeprecatedKernelExecutor *>(device_context->kernel_executor_.get());
|
||||
if (deprecated_kernel_executor != nullptr) {
|
||||
deprecated_kernel_executor->UnifyMindIR(graph);
|
||||
} else {
|
||||
opt::CommonUnifyMindIR(graph);
|
||||
}
|
||||
|
||||
// The graph common optimization.
|
||||
graph->UpdateGraphAquireGilAttr();
|
||||
opt::BackendCommonOptimization(graph);
|
||||
graph->SetInputNodes();
|
||||
auto manager = MakeManager({graph});
|
||||
if (manager) {
|
||||
manager->AddFuncGraph(graph);
|
||||
graph->set_manager(manager);
|
||||
}
|
||||
session_->SetInputNodeUsage(graph, manager);
|
||||
graph->SetOptimizerFlag();
|
||||
graph->set_run_mode(device::RunMode::kKernelMode);
|
||||
|
||||
// Graph kernel does not support pynative mode now, print a warning here.
|
||||
graphkernel::GraphKernelFlags::GetInstance().CheckSupport();
|
||||
|
||||
GraphId graph_id = graph->graph_id();
|
||||
graph->set_root_graph_id(graph_id);
|
||||
session_->DumpGraphs({graph});
|
||||
|
||||
MS_LOG(INFO) << "Status record: end compile graph. graph id: " << graph_id;
|
||||
return graph_id;
|
||||
}
|
||||
|
||||
GraphId GraphCompiler::CompileWholeGraphForGraphRunMode(const FuncGraphPtr &func_graph,
|
||||
const DeviceContext *device_context) {
|
||||
MS_EXCEPTION_IF_NULL(session_);
|
||||
|
@ -607,10 +656,10 @@ void GraphCompiler::GetSingleOpRunInfoAndGraphInfo(const CNodePtr &kernel, const
|
|||
MS_EXCEPTION_IF_NULL(session_);
|
||||
MS_EXCEPTION_IF_NULL(graph_info);
|
||||
*op_run_info = session_->GetSingleOpRunInfo(kernel, *graph_info, tensor_info, graph_output_info);
|
||||
(*op_run_info)->base_op_run_info.use_dynamic_shape_process = use_dynamic_shape_process;
|
||||
session_->GetSingleOpGraphInfo(kernel, tensor_info, graph_info, *op_run_info);
|
||||
MS_EXCEPTION_IF_NULL(*op_run_info);
|
||||
(*op_run_info)->base_op_run_info.graph_info = *graph_info;
|
||||
(*op_run_info)->base_op_run_info.use_dynamic_shape_process = use_dynamic_shape_process;
|
||||
}
|
||||
|
||||
void GraphCompiler::CalculateRefCount(const KernelGraphPtr &graph, std::map<KernelWithIndex, size_t> *ref_count) const {
|
||||
|
|
|
@ -101,6 +101,9 @@ class GraphCompiler {
|
|||
GraphId CompileGraph(const GraphSegmentPtr &segment, const AnfNodePtrList &outputs,
|
||||
const DeviceContext *device_context, device::RunMode run_mode, bool run_in_pynative = false);
|
||||
|
||||
GraphId CompileDynamicGraph(const GraphSegmentPtr &segment, const AnfNodePtrList &outputs,
|
||||
const DeviceContext *device_context);
|
||||
|
||||
// Construct kernel graph from function graph and compile kernel graph in Graph mode,
|
||||
// the detailed implementation of compiling graph is in 'CompileGraphImpl'.
|
||||
GraphId CompileWholeGraphForGraphRunMode(const FuncGraphPtr &func_graph, const DeviceContext *device_context);
|
||||
|
|
|
@ -127,12 +127,13 @@ void UpdateRefNodeOutputDeviceAddress(const KernelGraphPtr &graph) {
|
|||
auto output_index = output_pair.second;
|
||||
auto &input_node = input_pair.first;
|
||||
auto input_node_output_index = input_pair.second;
|
||||
|
||||
auto input_addr = AnfAlgo::GetMutableOutputAddr(input_node, input_node_output_index, false);
|
||||
auto ref_node_output_addr = AnfAlgo::GetMutableOutputAddr(ref_node, output_index, false);
|
||||
if (input_addr != ref_node_output_addr) {
|
||||
AnfAlgo::SetOutputAddr(input_addr, output_index, ref_node.get());
|
||||
if (!AnfAlgo::OutputAddrExist(input_node, input_node_output_index, false)) {
|
||||
MS_LOG(WARNING) << "Output address not exist, node " << input_node->fullname_with_scope() << " index "
|
||||
<< input_node_output_index;
|
||||
continue;
|
||||
}
|
||||
auto input_addr = AnfAlgo::GetMutableOutputAddr(input_node, input_node_output_index, false);
|
||||
AnfAlgo::SetOutputAddr(input_addr, output_index, ref_node.get());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -503,7 +504,7 @@ void LaunchKernelsDynamic(const KernelGraphPtr &graph, const device::DeviceConte
|
|||
auto workspaces = CreateKernelWorkspaceAddressDynamic(runtime_info, device_context, node);
|
||||
|
||||
if (!MallocForKernelOutput(runtime_info, node, device_context)) {
|
||||
MS_LOG(EXCEPTION) << "Malloc for kernel output failed, Memory isn't enough, node:" << node->fullname_with_scope();
|
||||
MS_LOG(EXCEPTION) << "Malloc for kernel output failed, node:" << node->fullname_with_scope();
|
||||
}
|
||||
auto outputs = CreateKernelOutputAddress(runtime_info);
|
||||
const size_t stream_id = AnfAlgo::GetStreamId(node);
|
||||
|
|
|
@ -96,6 +96,7 @@ REG_ADPT_DESC(Expand, "Expand", ADPT_DESC(Expand))
|
|||
|
||||
// ExpandDims
|
||||
INPUT_MAP(ExpandDims) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(axis)}};
|
||||
ATTR_INPUT_MAP(ExpandDims) = {{"axis", 2}};
|
||||
ATTR_MAP(ExpandDims) = EMPTY_ATTR_MAP;
|
||||
OUTPUT_MAP(ExpandDims) = {{0, OUTPUT_DESC(y)}};
|
||||
REG_ADPT_DESC(ExpandDims, kNameExpandDims, ADPT_DESC(ExpandDims))
|
||||
|
|
|
@ -424,7 +424,7 @@ REG_ADPT_DESC(RealDiv, kNameRealDiv, ADPT_DESC(RealDiv))
|
|||
// Cast
|
||||
INPUT_MAP(Cast) = {{1, INPUT_DESC(x)}};
|
||||
INPUT_ATTR_MAP(Cast) = {{2, ATTR_DESC(dst_type, AnyTraits<GEType>())}};
|
||||
ATTR_MAP(Cast) = EMPTY_ATTR_MAP;
|
||||
ATTR_MAP(Cast) = {{"dst_type", ATTR_DESC(dst_type, AnyTraits<GEType>())}};
|
||||
OUTPUT_MAP(Cast) = {{0, OUTPUT_DESC(y)}};
|
||||
REG_ADPT_DESC(Cast, prim::kPrimCast->name(), ADPT_DESC(Cast))
|
||||
|
||||
|
|
|
@ -59,6 +59,7 @@ REG_ADPT_DESC(InTopKD, kNameInTopKD, ADPT_DESC(InTopKD))
|
|||
|
||||
// OneHot
|
||||
INPUT_MAP(OneHot) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(depth)}, {3, INPUT_DESC(on_value)}, {4, INPUT_DESC(off_value)}};
|
||||
ATTR_INPUT_MAP(OneHot) = {{"depth", 2}};
|
||||
ATTR_MAP(OneHot) = {{"axis", ATTR_DESC(axis, AnyTraits<int64_t>())}};
|
||||
OUTPUT_MAP(OneHot) = {{0, OUTPUT_DESC(y)}};
|
||||
REG_ADPT_DESC(OneHot, prim::kPrimOneHot->name(), ADPT_DESC(OneHot))
|
||||
|
@ -68,7 +69,7 @@ INPUT_MAP(GatherV2) = {{1, INPUT_DESC(x)}, {2, INPUT_DESC(indices)}, {3, INPUT_D
|
|||
ATTR_INPUT_MAP(GatherV2) = {{"axis", 3}};
|
||||
ATTR_MAP(GatherV2) = EMPTY_ATTR_MAP;
|
||||
OUTPUT_MAP(GatherV2) = {{0, OUTPUT_DESC(y)}};
|
||||
REG_ADPT_DESC(GatherV2, prim::kPrimGather->name(), ADPT_DESC(GatherV2))
|
||||
REG_ADPT_DESC(GatherV2, prim::kPrimGatherV2->name(), ADPT_DESC(GatherV2))
|
||||
REG_ADPT_DESC(Gather, prim::kPrimGather->name(), ADPT_DESC(GatherV2))
|
||||
|
||||
// ScatterNd
|
||||
|
|
|
@ -30,7 +30,8 @@ INPUT_MAP(Pack) = EMPTY_INPUT_MAP;
|
|||
DYN_INPUT_MAP(Pack) = {{1, DYN_INPUT_DESC(x)}};
|
||||
ATTR_MAP(Pack) = {{"num", ATTR_DESC(N, AnyTraits<int64_t>())}, {"axis", ATTR_DESC(axis, AnyTraits<int64_t>())}};
|
||||
OUTPUT_MAP(Pack) = {{0, OUTPUT_DESC(y)}};
|
||||
REG_ADPT_DESC(Pack, prim::kStack, ADPT_DESC(Pack))
|
||||
REG_ADPT_DESC(Pack1, prim::kStack, ADPT_DESC(Pack))
|
||||
REG_ADPT_DESC(Pack2, prim::kPack, ADPT_DESC(Pack))
|
||||
|
||||
// ParallelConcat
|
||||
INPUT_MAP(ParallelConcat) = EMPTY_INPUT_MAP;
|
||||
|
|
|
@ -1703,7 +1703,7 @@ std::string AnfAlgo::GetTensorValueString(const tensor::TensorPtr &tensor) {
|
|||
std::ostringstream buf;
|
||||
auto fn = [&buf, data_size](auto addr) {
|
||||
for (size_t i = 0; i < data_size; ++i) {
|
||||
buf << *(addr + i);
|
||||
buf << *(addr + i) << ",";
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1713,6 +1713,8 @@ std::string AnfAlgo::GetTensorValueString(const tensor::TensorPtr &tensor) {
|
|||
fn(reinterpret_cast<int *>(tensor->data_c()));
|
||||
} else if (dtype->type_id() == kNumberTypeInt8) {
|
||||
fn(reinterpret_cast<int8_t *>(tensor->data_c()));
|
||||
} else if (dtype->type_id() == kNumberTypeUInt8) {
|
||||
fn(reinterpret_cast<uint8_t *>(tensor->data_c()));
|
||||
} else if (dtype->type_id() == kNumberTypeInt16) {
|
||||
fn(reinterpret_cast<int16_t *>(tensor->data_c()));
|
||||
} else if (dtype->type_id() == kNumberTypeInt32) {
|
||||
|
|
|
@ -166,6 +166,7 @@ constexpr auto kDynamicShape = "DynamicShape";
|
|||
constexpr auto kTensorShape = "TensorShape";
|
||||
constexpr auto kCheckNumerics = "CheckNumerics";
|
||||
constexpr auto kStack = "Stack";
|
||||
constexpr auto kPack = "Pack";
|
||||
constexpr auto kLogNormalReverse = "LogNormalReverse";
|
||||
constexpr auto kUnstack = "Unstack";
|
||||
constexpr auto kTupleGetItem = "TupleGetItem";
|
||||
|
|
|
@ -31,7 +31,6 @@ batch_matmul_op_info = TBERegOp("BatchMatMul") \
|
|||
.input(2, "bias", False, "optional", "all") \
|
||||
.output(0, "y", False, "required", "all") \
|
||||
.dtype_format(DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_Default, DataType.F16_FracNZ) \
|
||||
.dtype_format(DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F32_Default, DataType.F32_FracNZ) \
|
||||
.get_op_info()
|
||||
|
||||
|
||||
|
|
|
@ -36,8 +36,6 @@ matmul_op_info = TBERegOp("MatMul") \
|
|||
DataType.F16_FracNZ) \
|
||||
.dtype_format(DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F32_Default, DataType.I8_Default,
|
||||
DataType.F16_FracNZ) \
|
||||
.dtype_format(DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F32_Default, DataType.I8_Default,
|
||||
DataType.F32_FracNZ) \
|
||||
.get_op_info()
|
||||
|
||||
|
||||
|
|
|
@ -296,7 +296,7 @@ class CompareMultiNet2(nn.Cell):
|
|||
return x
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.level2
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
|
@ -353,7 +353,7 @@ def test_pynative_forward_hook():
|
|||
assert np.allclose(grad[1][0].asnumpy(), expect_grad[1][0].asnumpy(), 0.000001, 0.000001)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.level2
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
|
|
Loading…
Reference in New Issue