!19370 Adjust CPU optimize graph pass

Merge pull request !19370 from zyli2020/mindrt_debug
This commit is contained in:
i-robot 2021-07-05 21:09:43 +00:00 committed by Gitee
commit 88664ea17a
4 changed files with 36 additions and 7 deletions

View File

@ -319,6 +319,8 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic
// 'KernelMod' is real executive object of kernel.
device_context->CreateKernel(graph->execution_order());
device_context->PreprocessBeforeRunGraph(graph);
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) {
// Create device address for all anf nodes of graph.
CreateDeviceAddress(graph, device_context);
@ -376,6 +378,8 @@ GraphId GraphCompiler::CompileGraph(const session::OpRunInfo &op_run_info, const
// Generate 'KernelMod' for kernel in graph.
device_context->CreateKernel(graph->execution_order());
device_context->PreprocessBeforeRunSingleOpGraph(graph);
// Create device address for all anf nodes of graph.
CreateDeviceAddress(graph, device_context);

View File

@ -88,11 +88,6 @@ void CPUDeviceContext::OptimizeGraph(const KernelGraphPtr &graph) const {
// Run final optimization.
opt::CommonFinalOptimization(graph);
// Remove reorder after PS feature finish adapting push/pull in auto_monad.
auto execution_order = graph->execution_order();
AnfAlgo::ReorderPosteriorExecList(NOT_NULL(&execution_order));
graph->set_execution_order(execution_order);
}
void CPUDeviceContext::OptimizeSingleOpGraph(const KernelGraphPtr &graph) const {
@ -104,9 +99,7 @@ void CPUDeviceContext::OptimizeSingleOpGraph(const KernelGraphPtr &graph) const
void CPUDeviceContext::OptimizeGraphImpl(const KernelGraphPtr &graph) const {
auto optimizer = std::make_shared<opt::GraphOptimizer>();
auto pm = std::make_shared<opt::PassManager>();
pm->AddPass(std::make_shared<opt::InsertCastCPU>("insert_cast_cpu"));
pm->AddPass(std::make_shared<opt::InsertFormatTransformOpCPU>("insert_format_transform_op_cpu"));
pm->AddPass(std::make_shared<opt::EraseVisitAttr>());
optimizer->AddPassManager(pm);
(void)optimizer->Optimize(graph);
graph->SetExecOrderByDefault();
@ -142,6 +135,30 @@ void CPUDeviceContext::CreateKernel(const std::vector<CNodePtr> &nodes) const {
}
}
namespace {
void ProcessCast(const KernelGraphPtr &graph) {
auto optimizer = std::make_shared<opt::GraphOptimizer>();
auto pm = std::make_shared<opt::PassManager>();
pm->AddPass(std::make_shared<opt::InsertCastCPU>("insert_cast_cpu"));
MS_LOG(INFO) << "Insert cast pass";
pm->AddPass(std::make_shared<opt::EraseVisitAttr>());
optimizer->AddPassManager(pm);
(void)optimizer->Optimize(graph);
graph->SetExecOrderByDefault();
}
} // namespace
void CPUDeviceContext::PreprocessBeforeRunGraph(const KernelGraphPtr &graph) const {
ProcessCast(graph);
// Remove reorder after PS feature finish adapting push/pull in auto_monad.
auto execution_order = graph->execution_order();
AnfAlgo::ReorderPosteriorExecList(NOT_NULL(&execution_order));
graph->set_execution_order(execution_order);
}
void CPUDeviceContext::PreprocessBeforeRunSingleOpGraph(const KernelGraphPtr &graph) const { ProcessCast(graph); }
bool CPUDeviceContext::LaunchKernel(const CNodePtr &kernel, const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspace, const std::vector<AddressPtr> &outputs,
bool) const {

View File

@ -48,6 +48,9 @@ class CPUDeviceContext : public DeviceContext {
void SetOperatorInfo(const std::vector<CNodePtr> &nodes) const override;
void CreateKernel(const std::vector<CNodePtr> &nodes) const override;
void PreprocessBeforeRunGraph(const KernelGraphPtr &graph) const override;
void PreprocessBeforeRunSingleOpGraph(const KernelGraphPtr &graph) const override;
bool LaunchKernel(const CNodePtr &kernel, const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspace, const std::vector<AddressPtr> &outputs,
bool is_dynamic_shape = false) const override;

View File

@ -86,6 +86,11 @@ class DeviceContext {
// 'KernelMod' is real executive object of kernel.
virtual void CreateKernel(const std::vector<CNodePtr> &nodes) const = 0;
// Adjust kernel graph before run graph, used in Graph Mode.
virtual void PreprocessBeforeRunGraph(const KernelGraphPtr &graph) const {}
// Adjust single op kernel graph before run graph, used in PyNative Mode.
virtual void PreprocessBeforeRunSingleOpGraph(const KernelGraphPtr &graph) const {}
// Infer kernel shape and update abstract info for dynamic shape kernel.
virtual void UpdateDynamicShape(const CNodePtr &kernel) const { AnfAlgo::InferShape(kernel); }