!18843 New runtime support run kernel graph which contains more than one kernel in PyNative mode.

Merge pull request !18843 from zyli2020/mindrt_debug
This commit is contained in:
i-robot 2021-06-25 01:32:06 +00:00 committed by Gitee
commit 7f5b507190
3 changed files with 28 additions and 8 deletions

View File

@ -53,11 +53,17 @@ bool IsHostQueueDSActor(const AnfNodePtr &node, const KernelGraphPtr &graph, con
const std::vector<AnfNodePtr> &host_parameters, GraphExecutionStrategy strategy) {
MS_EXCEPTION_IF_NULL(node);
if (node->isa<Parameter>() && (!AnfAlgo::IsParameterWeight(node->cast<ParameterPtr>()))) {
// There is device address in tensor, indicating the input tensor is certain kernel's output,
// so it's unnecessary to put the input node to host queue data source actor.
if (strategy == GraphExecutionStrategy::kStep && tensor != nullptr &&
std::dynamic_pointer_cast<DeviceTensor>(tensor->device_address()) != nullptr) {
return false;
if (strategy == GraphExecutionStrategy::kStep) {
// In step mode, if the number of kernel actor in actor set is greater than one, the actor set need be drived
// to run by data source actor.
if (graph != nullptr && graph->execution_order().size() > 1) {
return true;
}
// There is device address in tensor, indicating the input tensor is certain kernel's output,
// so it's unnecessary to put the input node to host queue data source actor.
if (tensor != nullptr && std::dynamic_pointer_cast<DeviceTensor>(tensor->device_address()) != nullptr) {
return false;
}
}
if (graph == nullptr) {

View File

@ -321,6 +321,11 @@ void PrepareDataForHostDataSourceActor(const std::unordered_map<AnfNodePtr, size
}
}
}
inline bool IsSingleOpActorSet(const ActorSet *actor_set) {
MS_EXCEPTION_IF_NULL(actor_set);
return actor_set->kernel_actors_.size() == 1;
}
} // namespace
void GraphScheduler::Clear() {
@ -611,7 +616,7 @@ bool GraphScheduler::Run(const ActorSet *actor_set, GraphExecutionStrategy strat
}
// Trigger kernel actor running in the step execution strategy.
if (strategy == GraphExecutionStrategy::kStep) {
if (strategy == GraphExecutionStrategy::kStep && IsSingleOpActorSet(actor_set)) {
MS_EXCEPTION_IF_NULL(input_tensors);
for (auto &kernel_actor : actor_set->kernel_actors_) {
MS_EXCEPTION_IF_NULL(kernel_actor);
@ -730,7 +735,7 @@ void GraphScheduler::Link(ActorSet *actor_set, const GraphCompilerInfo &graph_co
KernelWithIndex from_kernel_with_output_idx = AnfAlgo::VisitKernelWithReturnType(input_node, 0, false);
KernelWithIndex to_kernel_with_input_idx = std::make_pair(kernel, i);
const auto &tensor = FetchInputTensor(graph_compiler_info, index, i);
TensorPtr tensor = IsSingleOpActorSet(actor_set) ? FetchInputTensor(graph_compiler_info, index, i) : nullptr;
// The gather of linking data arrows of kernel by the different from kernel type.
LinkDataArrow(kernel_actor, graph_compiler_info, graph, from_kernel_with_output_idx, to_kernel_with_input_idx,
tensor);
@ -959,7 +964,7 @@ std::vector<KernelActorPtr> GraphScheduler::BuildNoInputKernelActor(const ActorS
for (auto &kernel_actor : actor_set->kernel_actors_) {
MS_EXCEPTION_IF_NULL(kernel_actor);
// Framework will trigger kernel actor running in the step execution strategy.
if (strategy == GraphExecutionStrategy::kStep) {
if (strategy == GraphExecutionStrategy::kStep && IsSingleOpActorSet(actor_set)) {
kernel_actor->input_controls_num_++;
continue;
}

View File

@ -316,6 +316,15 @@ void GPUDeviceContext::SetOperatorInfo(const std::vector<CNodePtr> &nodes) const
void GPUDeviceContext::CreateKernel(const std::vector<CNodePtr> &nodes) const { CreateGPUKernel(nodes); }
void GPUDeviceContext::UpdateDynamicShape(const CNodePtr &kernel) const {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
bool is_pynative_infer = ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER);
bool is_pynative_mode = ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode;
if (is_pynative_infer || is_pynative_mode) {
return;
}
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
MS_EXCEPTION_IF_NULL(kernel_mod);