add float64 support to assign GPU
This commit is contained in:
parent
8d936a6589
commit
e1ed3b9c15
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -18,6 +18,10 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_ONE(
|
||||
Assign,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
AssignGpuKernel, double)
|
||||
MS_REG_GPU_KERNEL_ONE(
|
||||
Assign,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ASSIGN_GPU_KERNEL_H
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ASSIGN_GPU_KERNEL_H
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_OTHER_ASSIGN_GPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_OTHER_ASSIGN_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
|
@ -40,11 +40,11 @@ class AssignGpuKernel : public GpuKernel {
|
|||
CHECK_CUDA_RET_WITH_EXCEPT(
|
||||
kernel_node_,
|
||||
cudaMemcpyAsync(var, value, input_size_, cudaMemcpyDeviceToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)),
|
||||
"cudaMemxcpyAsync failed.");
|
||||
"cudaMemcpyAsync failed.");
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(
|
||||
kernel_node_,
|
||||
cudaMemcpyAsync(output, value, input_size_, cudaMemcpyDeviceToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)),
|
||||
"cudaMemxcpyAsync failed.");
|
||||
"cudaMemcpyAsync failed.");
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -93,4 +93,4 @@ class AssignGpuKernel : public GpuKernel {
|
|||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ASSIGN_GPU_KERNEL_H
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_OTHER_ASSIGN_GPU_KERNEL_H_
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -52,3 +52,21 @@ def test_assign():
|
|||
assert np.all(-diff1 < error)
|
||||
assert np.all(diff2 < error)
|
||||
assert np.all(-diff2 < error)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_assign_float64():
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
var = Tensor(x.astype(np.float64))
|
||||
assign = Net(var)
|
||||
output = assign(Tensor(value.astype(np.float64)))
|
||||
|
||||
error = np.ones(shape=[2, 2]) * 1.0e-6
|
||||
diff1 = output.asnumpy() - value
|
||||
diff2 = assign.var.data.asnumpy() - value
|
||||
assert np.all(diff1 < error)
|
||||
assert np.all(-diff1 < error)
|
||||
assert np.all(diff2 < error)
|
||||
assert np.all(-diff2 < error)
|
||||
|
|
Loading…
Reference in New Issue