gpu support tanh & tanhgrad kernel

2020-05-07 15:33:08 +08:00 · 2020-05-07 15:33:08 +08:00 · 311bf41e6d
parent d9dd6aa0b8
commit 311bf41e6d
7 changed files with 347 additions and 0 deletions
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/tanh_impl.cu
+++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/tanh_impl.cu
@ -0,0 +1,46 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/gpu/cuda_impl/tanh_impl.cuh"
+#include <cuda_runtime.h>
+
+template<typename T>
+__global__ void TanhKernel(const size_t size, const T* x_addr, T* y_addr) {
+  for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) {
+    y_addr[pos] = tanh(x_addr[pos]);
+  }
+}
+
+template<typename T>
+__global__ void TanhGradKernel(const size_t size, const T* y_addr, const T* dy_addr, T* dx_addr) {
+  for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < size; pos += blockDim.x * gridDim.x) {
+    dx_addr[pos] = dy_addr[pos] * (1 - y_addr[pos] * y_addr[pos]);
+  }
+}
+
+template<typename T>
+void Tanh(const size_t size, const T* x_addr, T* y_addr, cudaStream_t cuda_stream) {
+  TanhKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, x_addr, y_addr);
+}
+
+template<typename T>
+void TanhGrad(const size_t size, const T* y_addr, const T* dy_addr, T* dx_addr, cudaStream_t cuda_stream) {
+  TanhGradKernel<<<GET_BLOCKS(size), GET_THREADS, 0, cuda_stream>>>(size, y_addr, dy_addr, dx_addr);
+}
+
+template void Tanh(const size_t size, const float* x_addr, float* y_addr, cudaStream_t cuda_stream);
+template void TanhGrad(const size_t size, const float* y_addr, const float* dy_addr,
+                       float* dx_addr, cudaStream_t cuda_stream);
--- a/mindspore/ccsrc/kernel/gpu/cuda_impl/tanh_impl.cuh
+++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/tanh_impl.cuh
@ -0,0 +1,28 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_TAN_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_TAN_H_
+
+#include "device/gpu/cuda_common.h"
+
+template<typename T>
+void Tanh(const size_t size, const T* x_addr, T* y_addr, cudaStream_t cuda_stream);
+
+template<typename T>
+void TanhGrad(const size_t size, const T* y_addr, const T* dy_addr, T* dx_addr, cudaStream_t cuda_stream);
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_TAN_H_
--- a/mindspore/ccsrc/kernel/gpu/nn/tanh_gpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/gpu/nn/tanh_gpu_kernel.cc
@ -0,0 +1,24 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/gpu/nn/tanh_gpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_ONE(Tanh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+                      TanhGpuKernel, float)
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/kernel/gpu/nn/tanh_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/tanh_gpu_kernel.h
@ -0,0 +1,75 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_TANH_GPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_NN_TANH_GPU_KERNEL_H_
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <memory>
+#include "kernel/gpu/gpu_kernel.h"
+#include "kernel/gpu/gpu_kernel_factory.h"
+#include "kernel/gpu/cuda_impl/tanh_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+template <typename T>
+class TanhGpuKernel : public GpuKernel {
+ public:
+  TanhGpuKernel() : input_size_(0) {}
+  ~TanhGpuKernel() override = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
+    auto x_addr = GetDeviceAddress<T>(inputs, 0);
+    auto y_addr = GetDeviceAddress<T>(outputs, 0);
+
+    Tanh(input_size_ / sizeof(T), x_addr, y_addr, reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+  bool Init(const CNodePtr &kernel_node) override {
+    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+
+    input_size_ = sizeof(T);
+    for (auto dim : input_shape) {
+      input_size_ *= dim;
+    }
+
+    InitSizeLists();
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(input_size_);
+    input_size_list_.push_back(input_size_);
+    output_size_list_.push_back(input_size_);
+  }
+
+ private:
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  size_t input_size_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_NN_LSTM_GPU_KERNEL_H_
--- a/mindspore/ccsrc/kernel/gpu/nn/tanh_grad_kernel.cc
+++ b/mindspore/ccsrc/kernel/gpu/nn/tanh_grad_kernel.cc
@ -0,0 +1,26 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/gpu/nn/tanh_grad_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_ONE(
+  TanhGrad,
+  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  TanhGradKernel, float)
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/kernel/gpu/nn/tanh_grad_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/tanh_grad_kernel.h
@ -0,0 +1,76 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_TANH_GRAD_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_NN_TANH_GRAD_KERNEL_H_
+
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <memory>
+#include "kernel/gpu/gpu_kernel.h"
+#include "kernel/gpu/gpu_kernel_factory.h"
+#include "kernel/gpu/cuda_impl/tanh_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+template <typename T>
+class TanhGradKernel : public GpuKernel {
+ public:
+  TanhGradKernel() : input_size_(0) {}
+  ~TanhGradKernel() override = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
+    auto y_addr = GetDeviceAddress<T>(inputs, 0);
+    auto dy_addr = GetDeviceAddress<T>(inputs, 1);
+    auto dx_addr = GetDeviceAddress<T>(outputs, 0);
+
+    TanhGrad(input_size_ / sizeof(T), y_addr, dy_addr, dx_addr, reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+  bool Init(const CNodePtr &kernel_node) override {
+    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+
+    input_size_ = sizeof(T);
+    for (auto dim : input_shape) {
+      input_size_ *= dim;
+    }
+
+    InitSizeLists();
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(input_size_);
+    input_size_list_.push_back(input_size_);
+    output_size_list_.push_back(input_size_);
+  }
+
+ private:
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+  size_t input_size_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_NN_TANH_GRAD_KERNEL_H_
--- a/tests/st/ops/gpu/test_tanh_op.py
+++ b/tests/st/ops/gpu/test_tanh_op.py
@ -0,0 +1,72 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import pytest
+import numpy as np
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.ops import operations as P
+from mindspore.ops import composite as C
+import mindspore.context as context
+
+context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+
+class TanhNet(nn.Cell):
+    def __init__(self):
+        super(TanhNet, self).__init__()
+        self.tanh = P.Tanh()
+
+    def construct(self, x):
+        return self.tanh(x)
+
+
+class Grad(nn.Cell):
+    def __init__(self, network):
+        super(Grad, self).__init__()
+        self.grad = C.GradOperation(name="get_all", get_all=True, sens_param=True)
+        self.network = network
+
+    def construct(self, input_data, sens):
+        gout = self.grad(self.network)(input_data, sens)
+        return gout
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_Tanh():
+    x_np = np.array(
+           [[ 0.28522366,  0.38033979,  1.54657853, -0.98530175, -0.54365635,  0.12652203, -1.33449938, -0.27737698],
+            [ 2.06282293,  0.84635078,  0.16628414, -0.91823183, -0.72023044, -0.09147043, -0.04166984, -1.5664763 ],
+            [-0.17157249,  0.44260951, -0.6683391,   1.13142613,  1.5536937,  -0.32799768, -0.20016545,  0.06773927]], 
+            dtype= np.float32)
+    dy_np = np.array(
+            [[ 0.44969849, -0.187879,   -0.64300827,  1.36638774,  0.89930276, -0.23835229, -0.67771854, -1.88984999],
+             [ 2.00418801,  2.33336475,  0.00241747,  1.31558685,  0.06768817, -2.23008804, -0.26818366, -1.26873401],
+             [ 1.83694105,  0.5339005,   0.51117424,  0.49202378, -0.83297819, -0.71001219, 0.18913512,  0.65580389]],
+             dtype= np.float32)
+
+    x_ms = Tensor(x_np)
+    dy_ms = Tensor(dy_np)
+    
+    net = TanhNet()
+    grad = Grad(net)
+    output = grad(x_ms, dy_ms)
+
+    expect = [[ 0.41501077, -0.16312202, -0.10675912,  0.58678646,  0.67828224, -0.23457714, -0.1643468 , -1.75159405], 
+              [ 0.12541081,  1.2251587 ,  0.00235184,  0.62396731,  0.04191568, -2.21153283, -0.26771853, -0.20311764],
+              [ 1.78391056,  0.44159236,  0.33690308,  0.16800483, -0.13651318, -0.63878956,  0.18175511,  0.65280384]]
+
+    assert np.allclose(output[0].asnumpy(), expect)