From 30c6fa7f9b0d0b951d6012e5856616dae0ae05b9 Mon Sep 17 00:00:00 2001 From: zhujingxuan Date: Fri, 26 Nov 2021 16:05:52 +0800 Subject: [PATCH] bind stream with handle --- .../backend/kernel_compiler/gpu/math/cholesky_gpu_kernel.h | 4 ++++ .../kernel_compiler/gpu/math/cholesky_solve_gpu_kernel.h | 2 ++ .../kernel_compiler/gpu/math/cholesky_trsm_solve_gpu_kernel.h | 4 ++++ .../backend/kernel_compiler/gpu/math/eigh_c_gpu_kernel.h | 4 ++++ .../ccsrc/backend/kernel_compiler/gpu/math/eigh_gpu_kernel.h | 2 ++ .../ccsrc/backend/kernel_compiler/gpu/math/lu_gpu_kernel.h | 2 ++ .../backend/kernel_compiler/gpu/math/matmul_gpu_kernel.h | 3 ++- .../kernel_compiler/gpu/math/matrix_inverse_gpu_kernel.h | 2 ++ .../backend/kernel_compiler/gpu/math/trsm_solve_gpu_kernel.h | 2 ++ .../backend/kernel_compiler/gpu/math/update_thor_gradient.h | 2 ++ 10 files changed, 26 insertions(+), 1 deletion(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_gpu_kernel.h index 407b1d9bf6e..3786b614349 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_gpu_kernel.h @@ -51,6 +51,10 @@ class CholeskyGpuKernel : public GpuKernel { bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) override { + CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(handle_, reinterpret_cast(stream_ptr)), + "cusolverDnSetStream failed"); + CHECK_CUBLAS_RET_WITH_ERROR(cublasSetStream(blas_handle_, reinterpret_cast(stream_ptr)), + "cublasSetStream failed"); if (!use_split_matrix_) { return NoSplitLaunch(inputs, workspace, outputs, stream_ptr); } diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_solve_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_solve_gpu_kernel.h index 1639efd67c4..f14b1218091 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_solve_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_solve_gpu_kernel.h @@ -49,6 +49,8 @@ class CholeskySolveGpuKernel : public GpuKernel { bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) override { + CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(handle_, reinterpret_cast(stream_ptr)), + "cusolverDnSetStream failed"); auto input_a_addr = GetDeviceAddress(inputs, kDim0); auto input_b_addr = GetDeviceAddress(inputs, kDim1); auto output_addr = GetDeviceAddress(outputs, kDim0); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_trsm_solve_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_trsm_solve_gpu_kernel.h index 28726144cf4..18a5e25c802 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_trsm_solve_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/cholesky_trsm_solve_gpu_kernel.h @@ -55,6 +55,10 @@ class CholeskyTrsmGpuKernel : public GpuKernel { if (is_null_input_) { return true; } + CHECK_CUBLAS_RET_WITH_ERROR(cublasSetStream(blas_handle_, reinterpret_cast(stream_ptr)), + "cublasSetStream failed"); + CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(handle_, reinterpret_cast(stream_ptr)), + "cusolverDnSetStream failed"); if (!use_split_matrix_) { LaunchNonSplitMatrix(inputs, workspace, outputs, stream_ptr); } else { diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/eigh_c_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/eigh_c_gpu_kernel.h index 4b9828ab177..d62eccaa692 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/eigh_c_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/eigh_c_gpu_kernel.h @@ -85,6 +85,10 @@ class EighcGpuKernel : public GpuKernel { bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) override { + CHECK_CUBLAS_RET_WITH_ERROR(cublasSetStream(blas_handle_, reinterpret_cast(stream_ptr)), + "cublasSetStream failed"); + CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(cusolver_handle_, reinterpret_cast(stream_ptr)), + "cusolverDnSetStream failed"); // matrix A, input or output(eigenvector) auto inout_A_addr = GetDeviceAddress(inputs, kDim0); if (lower_) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/eigh_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/eigh_gpu_kernel.h index 831c8de1942..d4d3495c9da 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/eigh_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/eigh_gpu_kernel.h @@ -71,6 +71,8 @@ class EighGpuKernel : public GpuKernel { bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) override { + CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(cusolver_handle_, reinterpret_cast(stream_ptr)), + "cusolverDnSetStream failed"); // matrix A, input or output(eigenvector) auto inout_A_addr = GetDeviceAddress(inputs, kDim0); // Notice :this is important diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/lu_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/lu_gpu_kernel.h index 70bd8696b14..4eb164952ed 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/lu_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/lu_gpu_kernel.h @@ -47,6 +47,8 @@ class LUGpuKernel : public GpuKernel { bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) override { + CHECK_CUSOLVER_RET_WITH_ERROR(cusolverDnSetStream(handle_, reinterpret_cast(stream_ptr)), + "cusolverDnSetStream failed"); auto input_addr = GetDeviceAddress(inputs, kDim0); auto output_addr = GetDeviceAddress(outputs, kDim0); int *piv_output_addr = nullptr; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.h index dc605af8fcc..bc5c748bda0 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/matmul_gpu_kernel.h @@ -43,8 +43,9 @@ class MatMulGpuKernel : public GpuKernel { if (is_null_input_) { return true; } + CHECK_CUBLAS_RET_WITH_ERROR(cublasSetStream(handle_, reinterpret_cast(stream_ptr)), + "cublasSetStream failed"); VARIABLE_NOT_USED(workspace); - VARIABLE_NOT_USED(stream_ptr); if (is_null_input_) { return true; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/matrix_inverse_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/matrix_inverse_gpu_kernel.h index 25ccc5fcef7..ed6f19c96c2 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/matrix_inverse_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/matrix_inverse_gpu_kernel.h @@ -41,6 +41,8 @@ class MatrixInverseGpuKernel : public GpuKernel { if (is_null_input_) { return true; } + CHECK_CUBLAS_RET_WITH_ERROR(cublasSetStream(handle_, reinterpret_cast(stream_ptr)), + "cublasSetStream failed"); T *input_addr = GetDeviceAddress(inputs, 0); T *output_addr = GetDeviceAddress(outputs, 0); auto compute_input_addr = GetDeviceAddress(workspace, 0); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/trsm_solve_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/trsm_solve_gpu_kernel.h index 43a6c4f36a0..51f2d81a509 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/trsm_solve_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/trsm_solve_gpu_kernel.h @@ -43,6 +43,8 @@ class TrsmGpuKernel : public GpuKernel { if (is_null_input_) { return true; } + CHECK_CUBLAS_RET_WITH_ERROR(cublasSetStream(blas_handle_, reinterpret_cast(stream_ptr)), + "cublasSetStream failed"); auto inputA_addr = GetDeviceAddress(inputs, 0); auto inputb_addr = GetDeviceAddress(inputs, 1); auto output_addr = GetDeviceAddress(outputs, 0); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/update_thor_gradient.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/update_thor_gradient.h index e410e40aa01..3a42ede40d2 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/math/update_thor_gradient.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/math/update_thor_gradient.h @@ -53,6 +53,8 @@ class UpdateThorGradientGpuKernel : public GpuKernel { if (is_null_input_) { return true; } + CHECK_CUBLAS_RET_WITH_ERROR(cublasSetStream(handle_, reinterpret_cast(stream_ptr)), + "cublasSetStream failed"); auto input1_addr = GetDeviceAddress(inputs, 0); auto input2_addr = GetDeviceAddress(inputs, 1); auto input3_addr = GetDeviceAddress(inputs, 2);