forked from mindspore-Ecosystem/mindspore
!19370 Adjust CPU optimize graph pass
Merge pull request !19370 from zyli2020/mindrt_debug
This commit is contained in:
commit
88664ea17a
|
@ -319,6 +319,8 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic
|
|||
// 'KernelMod' is real executive object of kernel.
|
||||
device_context->CreateKernel(graph->execution_order());
|
||||
|
||||
device_context->PreprocessBeforeRunGraph(graph);
|
||||
|
||||
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) {
|
||||
// Create device address for all anf nodes of graph.
|
||||
CreateDeviceAddress(graph, device_context);
|
||||
|
@ -376,6 +378,8 @@ GraphId GraphCompiler::CompileGraph(const session::OpRunInfo &op_run_info, const
|
|||
// Generate 'KernelMod' for kernel in graph.
|
||||
device_context->CreateKernel(graph->execution_order());
|
||||
|
||||
device_context->PreprocessBeforeRunSingleOpGraph(graph);
|
||||
|
||||
// Create device address for all anf nodes of graph.
|
||||
CreateDeviceAddress(graph, device_context);
|
||||
|
||||
|
|
|
@ -88,11 +88,6 @@ void CPUDeviceContext::OptimizeGraph(const KernelGraphPtr &graph) const {
|
|||
|
||||
// Run final optimization.
|
||||
opt::CommonFinalOptimization(graph);
|
||||
|
||||
// Remove reorder after PS feature finish adapting push/pull in auto_monad.
|
||||
auto execution_order = graph->execution_order();
|
||||
AnfAlgo::ReorderPosteriorExecList(NOT_NULL(&execution_order));
|
||||
graph->set_execution_order(execution_order);
|
||||
}
|
||||
|
||||
void CPUDeviceContext::OptimizeSingleOpGraph(const KernelGraphPtr &graph) const {
|
||||
|
@ -104,9 +99,7 @@ void CPUDeviceContext::OptimizeSingleOpGraph(const KernelGraphPtr &graph) const
|
|||
void CPUDeviceContext::OptimizeGraphImpl(const KernelGraphPtr &graph) const {
|
||||
auto optimizer = std::make_shared<opt::GraphOptimizer>();
|
||||
auto pm = std::make_shared<opt::PassManager>();
|
||||
pm->AddPass(std::make_shared<opt::InsertCastCPU>("insert_cast_cpu"));
|
||||
pm->AddPass(std::make_shared<opt::InsertFormatTransformOpCPU>("insert_format_transform_op_cpu"));
|
||||
pm->AddPass(std::make_shared<opt::EraseVisitAttr>());
|
||||
optimizer->AddPassManager(pm);
|
||||
(void)optimizer->Optimize(graph);
|
||||
graph->SetExecOrderByDefault();
|
||||
|
@ -142,6 +135,30 @@ void CPUDeviceContext::CreateKernel(const std::vector<CNodePtr> &nodes) const {
|
|||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
void ProcessCast(const KernelGraphPtr &graph) {
|
||||
auto optimizer = std::make_shared<opt::GraphOptimizer>();
|
||||
auto pm = std::make_shared<opt::PassManager>();
|
||||
pm->AddPass(std::make_shared<opt::InsertCastCPU>("insert_cast_cpu"));
|
||||
MS_LOG(INFO) << "Insert cast pass";
|
||||
pm->AddPass(std::make_shared<opt::EraseVisitAttr>());
|
||||
optimizer->AddPassManager(pm);
|
||||
(void)optimizer->Optimize(graph);
|
||||
graph->SetExecOrderByDefault();
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void CPUDeviceContext::PreprocessBeforeRunGraph(const KernelGraphPtr &graph) const {
|
||||
ProcessCast(graph);
|
||||
|
||||
// Remove reorder after PS feature finish adapting push/pull in auto_monad.
|
||||
auto execution_order = graph->execution_order();
|
||||
AnfAlgo::ReorderPosteriorExecList(NOT_NULL(&execution_order));
|
||||
graph->set_execution_order(execution_order);
|
||||
}
|
||||
|
||||
void CPUDeviceContext::PreprocessBeforeRunSingleOpGraph(const KernelGraphPtr &graph) const { ProcessCast(graph); }
|
||||
|
||||
bool CPUDeviceContext::LaunchKernel(const CNodePtr &kernel, const std::vector<AddressPtr> &inputs,
|
||||
const std::vector<AddressPtr> &workspace, const std::vector<AddressPtr> &outputs,
|
||||
bool) const {
|
||||
|
|
|
@ -48,6 +48,9 @@ class CPUDeviceContext : public DeviceContext {
|
|||
void SetOperatorInfo(const std::vector<CNodePtr> &nodes) const override;
|
||||
void CreateKernel(const std::vector<CNodePtr> &nodes) const override;
|
||||
|
||||
void PreprocessBeforeRunGraph(const KernelGraphPtr &graph) const override;
|
||||
void PreprocessBeforeRunSingleOpGraph(const KernelGraphPtr &graph) const override;
|
||||
|
||||
bool LaunchKernel(const CNodePtr &kernel, const std::vector<AddressPtr> &inputs,
|
||||
const std::vector<AddressPtr> &workspace, const std::vector<AddressPtr> &outputs,
|
||||
bool is_dynamic_shape = false) const override;
|
||||
|
|
|
@ -86,6 +86,11 @@ class DeviceContext {
|
|||
// 'KernelMod' is real executive object of kernel.
|
||||
virtual void CreateKernel(const std::vector<CNodePtr> &nodes) const = 0;
|
||||
|
||||
// Adjust kernel graph before run graph, used in Graph Mode.
|
||||
virtual void PreprocessBeforeRunGraph(const KernelGraphPtr &graph) const {}
|
||||
// Adjust single op kernel graph before run graph, used in PyNative Mode.
|
||||
virtual void PreprocessBeforeRunSingleOpGraph(const KernelGraphPtr &graph) const {}
|
||||
|
||||
// Infer kernel shape and update abstract info for dynamic shape kernel.
|
||||
virtual void UpdateDynamicShape(const CNodePtr &kernel) const { AnfAlgo::InferShape(kernel); }
|
||||
|
||||
|
|
Loading…
Reference in New Issue