From 8745e4d3f113e33d6d89e44509f9dfe4e92c96f5 Mon Sep 17 00:00:00 2001 From: Gaoxiong Date: Mon, 28 Feb 2022 10:14:06 +0800 Subject: [PATCH] GraphKernel performance optimize: cache kernel addr in KernelMod --- .../gpu/kernel/akg/akg_gpu_kernel_mod.cc | 28 ++++++++++++------- .../gpu/kernel/akg/akg_gpu_kernel_mod.h | 2 ++ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/akg/akg_gpu_kernel_mod.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/akg/akg_gpu_kernel_mod.cc index 6d404a10ea9..9d634a61cb7 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/akg/akg_gpu_kernel_mod.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/akg/akg_gpu_kernel_mod.cc @@ -102,6 +102,12 @@ AkgGpuKernelMod::AkgGpuKernelMod(const KernelPackPtr &kernel_pack) : kernel_pack bool AkgGpuKernelMod::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) { + const int BX = 0; + const int BY = 1; + const int BZ = 2; + const int TX = 3; + const int TY = 4; + const int TZ = 5; if (stream_ptr == 0) { MS_LOG(ERROR) << "stream_ptr should not be nullptr. Kernel name: " << kernel_name_; return false; @@ -110,16 +116,18 @@ bool AkgGpuKernelMod::Launch(const std::vector &inputs, const std::v MS_LOG(ERROR) << "kernel pack should not be nullptr. Kernel name: " << kernel_name_; return false; } - vector thread_info; - CUfunction kernel_addr; - CUresult result = kernel_manager_->GetFunction(kernel_pack_, false, &thread_info, &kernel_addr); - if (result != CUDA_SUCCESS) { - const char *msg = nullptr; - cuGetErrorName(result, &msg); - MS_LOG(ERROR) << "Get function " << kernel_name_ << " failed. Error message: " << msg; - return false; + CUresult result; + if (kernel_addr_ == nullptr) { + result = kernel_manager_->GetFunction(kernel_pack_, false, &thread_info_, &kernel_addr_); + if (result != CUDA_SUCCESS) { + const char *msg = nullptr; + cuGetErrorName(result, &msg); + MS_LOG(ERROR) << "Get function " << kernel_name_ << " failed. Error message: " << msg; + return false; + } } std::vector runtimeargs; + runtimeargs.reserve(inputs.size() + outputs.size() + workspace.size()); (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtimeargs), [](const AddressPtr &input) { return reinterpret_cast(&(input->addr)); }); (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtimeargs), @@ -128,8 +136,8 @@ bool AkgGpuKernelMod::Launch(const std::vector &inputs, const std::v (void)std::transform(std::begin(workspace), std::end(workspace), std::back_inserter(runtimeargs), [](const AddressPtr &addr) { return reinterpret_cast(&(addr->addr)); }); } - result = cuLaunchKernel(kernel_addr, thread_info[0], thread_info[1], thread_info[2], thread_info[3], thread_info[4], - thread_info[5], 0, reinterpret_cast(stream_ptr), + result = cuLaunchKernel(kernel_addr_, thread_info_[BX], thread_info_[BY], thread_info_[BZ], thread_info_[TX], + thread_info_[TY], thread_info_[TZ], 0, reinterpret_cast(stream_ptr), reinterpret_cast(&runtimeargs[0]), 0); if (result != CUDA_SUCCESS) { const char *msg = nullptr; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/akg/akg_gpu_kernel_mod.h b/mindspore/ccsrc/plugin/device/gpu/kernel/akg/akg_gpu_kernel_mod.h index 444de058c34..f19d3cf0b4b 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/akg/akg_gpu_kernel_mod.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/akg/akg_gpu_kernel_mod.h @@ -68,6 +68,8 @@ class AkgGpuKernelMod : public GpuKernelMod { private: KernelPackPtr kernel_pack_; + std::vector thread_info_; + CUfunction kernel_addr_{nullptr}; }; using AkgGpuKernelModPtr = std::shared_ptr;