Refactor Cholesky to CholeskyTrsm

This commit is contained in:
peixu_ren 2020-11-03 10:52:01 -05:00
parent a07250e8b0
commit 9a81b50e43
5 changed files with 16 additions and 16 deletions

View File

@ -14,10 +14,10 @@
* limitations under the License. * limitations under the License.
*/ */
#include "backend/kernel_compiler/gpu/math/cholesky_solve_gpu_kernel.h" #include "backend/kernel_compiler/gpu/math/cholesky_trsm_solve_gpu_kernel.h"
namespace mindspore { namespace mindspore {
namespace kernel { namespace kernel {
MS_REG_GPU_KERNEL_ONE(Cholesky, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), MS_REG_GPU_KERNEL_ONE(CholeskyTrsm, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
CholeskyGpuKernel, float) CholeskyTrsmGpuKernel, float)
} // namespace kernel } // namespace kernel
} // namespace mindspore } // namespace mindspore

View File

@ -14,8 +14,8 @@
* limitations under the License. * limitations under the License.
*/ */
#ifndef MINDSPORE_CHOLESKY_SOLVE_GPU_KERNEL_H #ifndef MINDSPORE_CHOLESKY_TRSM_SOLVE_GPU_KERNEL_H
#define MINDSPORE_CHOLESKY_SOLVE_GPU_KERNEL_H #define MINDSPORE_CHOLESKY_TRSM_SOLVE_GPU_KERNEL_H
#include <cublas_v2.h> #include <cublas_v2.h>
#include <cuda_runtime_api.h> #include <cuda_runtime_api.h>
#include <vector> #include <vector>
@ -29,10 +29,10 @@
namespace mindspore { namespace mindspore {
namespace kernel { namespace kernel {
template <typename T> template <typename T>
class CholeskyGpuKernel : public GpuKernel { class CholeskyTrsmGpuKernel : public GpuKernel {
public: public:
CholeskyGpuKernel() : batch_(0), m_(0), lda_(0), is_null_input_(false), handle_(nullptr) {} CholeskyTrsmGpuKernel() : batch_(0), m_(0), lda_(0), is_null_input_(false), handle_(nullptr) {}
~CholeskyGpuKernel() = default; ~CholeskyTrsmGpuKernel() = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
@ -111,12 +111,12 @@ class CholeskyGpuKernel : public GpuKernel {
if (in_shape.size() == 2) { if (in_shape.size() == 2) {
batch_ = 1; batch_ = 1;
if (in_shape[0] != in_shape[1]) { if (in_shape[0] != in_shape[1]) {
MS_LOG(ERROR) << "Cholesky need square matrix as input."; MS_LOG(ERROR) << "CholeskyTrsm need square matrix as input.";
} }
} else if (in_shape.size() == 3) { } else if (in_shape.size() == 3) {
batch_ = SizeToInt(in_shape[0]); batch_ = SizeToInt(in_shape[0]);
if (in_shape[1] != in_shape[2]) { if (in_shape[1] != in_shape[2]) {
MS_LOG(ERROR) << "Cholesky need square matrix as input."; MS_LOG(ERROR) << "CholeskyTrsm need square matrix as input.";
} }
} else { } else {
MS_LOG(ERROR) << "Input Only support Rank 2 OR 3"; MS_LOG(ERROR) << "Input Only support Rank 2 OR 3";
@ -140,12 +140,12 @@ class CholeskyGpuKernel : public GpuKernel {
InitSizeLists(); InitSizeLists();
} else { } else {
if (in_shape.size() != 2) { if (in_shape.size() != 2) {
MS_LOG(ERROR) << "Cholesky Split Matrix Need Input Rank as 2."; MS_LOG(ERROR) << "CholeskyTrsm Split Matrix Need Input Rank as 2.";
} }
height = in_shape[0]; height = in_shape[0];
width = in_shape[1]; width = in_shape[1];
if (height != width) { if (height != width) {
MS_LOG(ERROR) << "Cholesky Split Matrix Need Square Matrix as Input."; MS_LOG(ERROR) << "CholeskyTrsm Split Matrix Need Square Matrix as Input.";
} }
if (SizeToInt(height) <= split_dim) { if (SizeToInt(height) <= split_dim) {
use_split_matrix = false; use_split_matrix = false;

View File

@ -87,7 +87,7 @@ from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, Popul
from ._thor_ops import (CusBatchMatMul, CusCholeskyTrsm, CusFusedAbsMax1, CusImg2Col, CusMatMulCubeDenseLeft, from ._thor_ops import (CusBatchMatMul, CusCholeskyTrsm, CusFusedAbsMax1, CusImg2Col, CusMatMulCubeDenseLeft,
CusMatMulCubeFraczRightMul, CusMatMulCube, CusMatrixCombine, CusTranspose02314, CusMatMulCubeFraczRightMul, CusMatMulCube, CusMatrixCombine, CusTranspose02314,
CusMatMulCubeDenseRight, CusMatMulCubeDenseRight,
CusMatMulCubeFraczLeftCast, Im2Col, UpdateThorGradient, Cholesky, DetTriangle) CusMatMulCubeFraczLeftCast, Im2Col, UpdateThorGradient, CholeskyTrsm, DetTriangle)
from .sparse_ops import SparseToDense from .sparse_ops import SparseToDense
from ._cache_ops import CacheSwapHashmap, SearchCacheIdx, CacheSwapTable, UpdateCache, MapCacheIdx from ._cache_ops import CacheSwapHashmap, SearchCacheIdx, CacheSwapTable, UpdateCache, MapCacheIdx

View File

@ -608,7 +608,7 @@ class UpdateThorGradient(PrimitiveWithInfer):
return x2_dtype return x2_dtype
class Cholesky(PrimitiveWithInfer): class CholeskyTrsm(PrimitiveWithInfer):
""" """
Inner API for resnet50 THOR GPU backend Inner API for resnet50 THOR GPU backend
""" """

View File

@ -198,7 +198,7 @@ class Conv2d_Thor_GPU(_Conv):
self.damping = Parameter(Tensor(damping), name="damping_value", requires_grad=False) self.damping = Parameter(Tensor(damping), name="damping_value", requires_grad=False)
self.dampingA = Tensor(np.identity(self.matrix_A_dim), mstype.float32) self.dampingA = Tensor(np.identity(self.matrix_A_dim), mstype.float32)
self.dampingG = Tensor(np.identity(self.matrix_G_dim), mstype.float32) self.dampingG = Tensor(np.identity(self.matrix_G_dim), mstype.float32)
self.cholesky = P.Cholesky(split_dim=split_dim) self.cholesky = P.CholeskyTrsm(split_dim=split_dim)
self.vector_matmul = P.BatchMatMul(transpose_a=True) self.vector_matmul = P.BatchMatMul(transpose_a=True)
def save_gradient(self, dout): def save_gradient(self, dout):
@ -340,7 +340,7 @@ class Dense_Thor_GPU(Cell):
self.axis = 0 self.axis = 0
self.add = P.TensorAdd() self.add = P.TensorAdd()
self.sqrt = P.Sqrt() self.sqrt = P.Sqrt()
self.cholesky = P.Cholesky(split_dim=split_dim) self.cholesky = P.CholeskyTrsm(split_dim=split_dim)
self.vector_matmul = P.BatchMatMul(transpose_a=True) self.vector_matmul = P.BatchMatMul(transpose_a=True)
def save_gradient(self, dout): def save_gradient(self, dout):