forked from mindspore-Ecosystem/mindspore
update input for ScatterUpdate/Add, fix doc text and fix dynamic shape
This commit is contained in:
parent
0856639fc5
commit
27a602f067
|
@ -40,10 +40,10 @@ class ScatterAddKernel : public GpuKernel {
|
||||||
int *indices = GetDeviceAddress<int>(inputs, 1);
|
int *indices = GetDeviceAddress<int>(inputs, 1);
|
||||||
T *updates = GetDeviceAddress<T>(inputs, 2);
|
T *updates = GetDeviceAddress<T>(inputs, 2);
|
||||||
T *output = GetDeviceAddress<T>(outputs, 0);
|
T *output = GetDeviceAddress<T>(outputs, 0);
|
||||||
|
CalScatterAdd(inner_size_, indices_size_, indices, updates, input, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||||
CHECK_CUDA_RET_WITH_EXCEPT(cudaMemcpyAsync(&output[0], &input[0], input_size_ * sizeof(T), cudaMemcpyDeviceToDevice,
|
CHECK_CUDA_RET_WITH_EXCEPT(cudaMemcpyAsync(&output[0], &input[0], input_size_ * sizeof(T), cudaMemcpyDeviceToDevice,
|
||||||
reinterpret_cast<cudaStream_t>(stream_ptr)),
|
reinterpret_cast<cudaStream_t>(stream_ptr)),
|
||||||
"cudaMemcpyAsync output failed");
|
"cudaMemcpyAsync output failed");
|
||||||
CalScatterAdd(inner_size_, indices_size_, indices, updates, output, reinterpret_cast<cudaStream_t>(stream_ptr));
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -40,10 +40,10 @@ class ScatterUpdateKernel : public GpuKernel {
|
||||||
int *indices = GetDeviceAddress<int>(inputs, 1);
|
int *indices = GetDeviceAddress<int>(inputs, 1);
|
||||||
T *updates = GetDeviceAddress<T>(inputs, 2);
|
T *updates = GetDeviceAddress<T>(inputs, 2);
|
||||||
T *output = GetDeviceAddress<T>(outputs, 0);
|
T *output = GetDeviceAddress<T>(outputs, 0);
|
||||||
|
CalScatterUpdate(inner_size_, indices_size_, indices, updates, input, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||||
CHECK_CUDA_RET_WITH_EXCEPT(cudaMemcpyAsync(&output[0], &input[0], input_size_ * sizeof(T), cudaMemcpyDeviceToDevice,
|
CHECK_CUDA_RET_WITH_EXCEPT(cudaMemcpyAsync(&output[0], &input[0], input_size_ * sizeof(T), cudaMemcpyDeviceToDevice,
|
||||||
reinterpret_cast<cudaStream_t>(stream_ptr)),
|
reinterpret_cast<cudaStream_t>(stream_ptr)),
|
||||||
"cudaMemcpyAsync output failed");
|
"cudaMemcpyAsync output failed");
|
||||||
CalScatterUpdate(inner_size_, indices_size_, indices, updates, output, reinterpret_cast<cudaStream_t>(stream_ptr));
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,26 +19,26 @@
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void ScatterAdd(const int inner_size, const int updates_size, const int *indices, const T *updates,
|
__global__ void ScatterAdd(const int inner_size, const int updates_size, const int *indices, const T *updates,
|
||||||
T *output) {
|
T *input) {
|
||||||
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
|
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
|
||||||
const size_t index = pos / inner_size;
|
const size_t index = pos / inner_size;
|
||||||
const size_t offset = pos % inner_size;
|
const size_t offset = pos % inner_size;
|
||||||
const size_t current_pos = indices[index] * inner_size + offset;
|
const size_t current_pos = indices[index] * inner_size + offset;
|
||||||
MsAtomicAdd(&output[current_pos], updates[pos]);
|
MsAtomicAdd(&input[current_pos], updates[pos]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void CalScatterAdd(const int &inner_size, const int &indices_size, const int *indices, const T *updates, T *output,
|
void CalScatterAdd(const int &inner_size, const int &indices_size, const int *indices, const T *updates, T *input,
|
||||||
cudaStream_t cuda_stream) {
|
cudaStream_t cuda_stream) {
|
||||||
const int updates_size = inner_size * indices_size;
|
const int updates_size = inner_size * indices_size;
|
||||||
ScatterAdd<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size, indices, updates,
|
ScatterAdd<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size, indices, updates,
|
||||||
output);
|
input);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void CalScatterAdd<float>(const int &inner_size, const int &indices_size, const int *indices,
|
template void CalScatterAdd<float>(const int &inner_size, const int &indices_size, const int *indices,
|
||||||
const float *updates, float *output, cudaStream_t cuda_stream);
|
const float *updates, float *input, cudaStream_t cuda_stream);
|
||||||
template void CalScatterAdd<half>(const int &inner_size, const int &indices_size, const int *indices,
|
template void CalScatterAdd<half>(const int &inner_size, const int &indices_size, const int *indices,
|
||||||
const half *updates, half *output, cudaStream_t cuda_stream);
|
const half *updates, half *input, cudaStream_t cuda_stream);
|
||||||
template void CalScatterAdd<int>(const int &inner_size, const int &indices_size, const int *indices, const int *updates,
|
template void CalScatterAdd<int>(const int &inner_size, const int &indices_size, const int *indices, const int *updates,
|
||||||
int *output, cudaStream_t cuda_stream);
|
int *input, cudaStream_t cuda_stream);
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include "runtime/device/gpu/cuda_common.h"
|
#include "runtime/device/gpu/cuda_common.h"
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void CalScatterAdd(const int &inner_size, const int &indices_size, const int *indices, const T *updates, T *output,
|
void CalScatterAdd(const int &inner_size, const int &indices_size, const int *indices, const T *updates, T *input,
|
||||||
cudaStream_t cuda_stream);
|
cudaStream_t cuda_stream);
|
||||||
|
|
||||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ADD_IMPL_CUH_
|
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ADD_IMPL_CUH_
|
||||||
|
|
|
@ -18,31 +18,31 @@
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void ScatterUpdate(const int inner_size, const int updates_size, const int *indices, const T *updates,
|
__global__ void ScatterUpdate(const int inner_size, const int updates_size, const int *indices, const T *updates,
|
||||||
T *output) {
|
T *input) {
|
||||||
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
|
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
|
||||||
const int index = pos / inner_size;
|
const int index = pos / inner_size;
|
||||||
const int offset = pos % inner_size;
|
const int offset = pos % inner_size;
|
||||||
const int current_pos = indices[index] * inner_size + offset;
|
const int current_pos = indices[index] * inner_size + offset;
|
||||||
output[current_pos] = updates[pos];
|
input[current_pos] = updates[pos];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void CalScatterUpdate(const int &inner_size, const int &indices_size, const int *indices, const T *updates, T *output,
|
void CalScatterUpdate(const int &inner_size, const int &indices_size, const int *indices, const T *updates, T *input,
|
||||||
cudaStream_t cuda_stream) {
|
cudaStream_t cuda_stream) {
|
||||||
const int updates_size = inner_size * indices_size;
|
const int updates_size = inner_size * indices_size;
|
||||||
ScatterUpdate<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size, indices, updates,
|
ScatterUpdate<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size, indices, updates,
|
||||||
output);
|
input);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void CalScatterUpdate<float>(const int &inner_size, const int &indices_size, const int *indices,
|
template void CalScatterUpdate<float>(const int &inner_size, const int &indices_size, const int *indices,
|
||||||
const float *updates, float *output, cudaStream_t cuda_stream);
|
const float *updates, float *input, cudaStream_t cuda_stream);
|
||||||
template void CalScatterUpdate<half>(const int &inner_size, const int &indices_size, const int *indices,
|
template void CalScatterUpdate<half>(const int &inner_size, const int &indices_size, const int *indices,
|
||||||
const half *updates, half *output, cudaStream_t cuda_stream);
|
const half *updates, half *input, cudaStream_t cuda_stream);
|
||||||
template void CalScatterUpdate<int>(const int &inner_size, const int &indices_size, const int *indices,
|
template void CalScatterUpdate<int>(const int &inner_size, const int &indices_size, const int *indices,
|
||||||
const int *updates, int *output, cudaStream_t cuda_stream);
|
const int *updates, int *input, cudaStream_t cuda_stream);
|
||||||
template void CalScatterUpdate<unsigned char>(const int &inner_size, const int &indices_size, const int *indices,
|
template void CalScatterUpdate<unsigned char>(const int &inner_size, const int &indices_size, const int *indices,
|
||||||
const unsigned char *updates, unsigned char *output,
|
const unsigned char *updates, unsigned char *input,
|
||||||
cudaStream_t cuda_stream);
|
cudaStream_t cuda_stream);
|
||||||
template void CalScatterUpdate<int8_t>(const int &inner_size, const int &indices_size, const int *indices,
|
template void CalScatterUpdate<int8_t>(const int &inner_size, const int &indices_size, const int *indices,
|
||||||
const int8_t *updates, int8_t *output, cudaStream_t cuda_stream);
|
const int8_t *updates, int8_t *input, cudaStream_t cuda_stream);
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include "runtime/device/gpu/cuda_common.h"
|
#include "runtime/device/gpu/cuda_common.h"
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void CalScatterUpdate(const int &inner_size, const int &indices_size, const int *indices, const T *updates, T *output,
|
void CalScatterUpdate(const int &inner_size, const int &indices_size, const int *indices, const T *updates, T *input,
|
||||||
cudaStream_t cuda_stream);
|
cudaStream_t cuda_stream);
|
||||||
|
|
||||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_UPDATE_IMPL_CUH_
|
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_UPDATE_IMPL_CUH_
|
||||||
|
|
|
@ -73,10 +73,20 @@ class _ScatterOp_Dynamic(PrimitiveWithCheck):
|
||||||
"""
|
"""
|
||||||
Defines Scatter operators with dynamic shape
|
Defines Scatter operators with dynamic shape
|
||||||
"""
|
"""
|
||||||
|
__mindspore_signature__ = (
|
||||||
|
sig.make_sig('x', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
|
||||||
|
sig.make_sig('indices', dtype=sig.sig_dtype.T1),
|
||||||
|
sig.make_sig('updates', dtype=sig.sig_dtype.T)
|
||||||
|
)
|
||||||
|
|
||||||
def _check_scatter_shape(self, x_shape, indices_shape, updates_shape, prim_name):
|
def _check_scatter_shape(self, x_shape, indices_shape, updates_shape, prim_name):
|
||||||
if np.all(np.array(x_shape) != -1):
|
# x_shape cannot be dynamic
|
||||||
if indices_shape != [-1] and updates_shape and updates_shape != indices_shape + x_shape[1:]:
|
if np.any(np.array(x_shape) == -1):
|
||||||
|
raise ValueError(f"x does not support dynamic shape")
|
||||||
|
# support indices and updates dynamic
|
||||||
|
if np.any(np.array(indices_shape) == -1) or np.any(np.array(updates_shape) == -1):
|
||||||
|
pass
|
||||||
|
elif indices_shape != [-1] and updates_shape and updates_shape != indices_shape + x_shape[1:]:
|
||||||
raise ValueError(f"For '{prim_name}', "
|
raise ValueError(f"For '{prim_name}', "
|
||||||
f"updates_shape = indices_shape + x_shape[1:], but got x_shape: {x_shape}, "
|
f"updates_shape = indices_shape + x_shape[1:], but got x_shape: {x_shape}, "
|
||||||
f"indices_shape: {indices_shape}, updates_shape: {updates_shape}.")
|
f"indices_shape: {indices_shape}, updates_shape: {updates_shape}.")
|
||||||
|
@ -3176,7 +3186,7 @@ class ScatterUpdate(_ScatterOp_Dynamic):
|
||||||
Tensor, has the same shape and type as `input_x`.
|
Tensor, has the same shape and type as `input_x`.
|
||||||
|
|
||||||
Supported Platforms:
|
Supported Platforms:
|
||||||
``Ascend``
|
``Ascend`` ``GPU``
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> np_x = np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]])
|
>>> np_x = np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]])
|
||||||
|
|
|
@ -56,8 +56,9 @@ class TestScatterAddDynamicNet(nn.Cell):
|
||||||
self.updates = Parameter(updates, name="updates")
|
self.updates = Parameter(updates, name="updates")
|
||||||
|
|
||||||
def construct(self):
|
def construct(self):
|
||||||
out = self.test_dynamic(self.inputx)
|
indices = self.test_dynamic(self.indices)
|
||||||
out = self.scatter_add(out, self.indices, self.updates)
|
updates = self.test_dynamic(self.updates)
|
||||||
|
out = self.scatter_add(self.inputx, indices, updates)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
def scatter_add_d_net(inputx, indices, updates):
|
def scatter_add_d_net(inputx, indices, updates):
|
||||||
|
@ -66,22 +67,24 @@ def scatter_add_d_net(inputx, indices, updates):
|
||||||
return net()
|
return net()
|
||||||
|
|
||||||
class TestScatterAddDynamicNet2(nn.Cell):
|
class TestScatterAddDynamicNet2(nn.Cell):
|
||||||
def __init__(self):
|
def __init__(self, inputx):
|
||||||
super(TestScatterAddDynamicNet2, self).__init__()
|
super(TestScatterAddDynamicNet2, self).__init__()
|
||||||
self.scatter_add = P.ScatterAdd()
|
self.scatter_add = P.ScatterAdd()
|
||||||
self.test_dynamic = inner.GpuConvertToDynamicShape()
|
self.test_dynamic = inner.GpuConvertToDynamicShape()
|
||||||
|
self.inputx = Parameter(inputx, name="inputx")
|
||||||
|
|
||||||
def construct(self, inputx, indices, updates):
|
def construct(self, indices, updates):
|
||||||
out = self.test_dynamic(inputx)
|
indices = self.test_dynamic(indices)
|
||||||
out = self.scatter_add(out, indices, updates)
|
updates = self.test_dynamic(updates)
|
||||||
|
out = self.scatter_add(self.inputx, indices, updates)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
def scatter_add_d2_net(inputx_1, indices_1, updates_1, inputx_2,
|
def scatter_add_d2_net(inputx, indices_1, updates_1,
|
||||||
indices_2, updates_2):
|
indices_2, updates_2):
|
||||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||||
net = TestScatterAddDynamicNet2()
|
net = TestScatterAddDynamicNet2(inputx)
|
||||||
out1 = net(inputx_1, indices_1, updates_1)
|
out1 = net(indices_1, updates_1)
|
||||||
out2 = net(inputx_2, indices_2, updates_2)
|
out2 = net(indices_2, updates_2)
|
||||||
return (out1, out2)
|
return (out1, out2)
|
||||||
|
|
||||||
@pytest.mark.level0
|
@pytest.mark.level0
|
||||||
|
@ -96,6 +99,20 @@ def test_scatter_add_small_float32():
|
||||||
[12., 14., 16.]])
|
[12., 14., 16.]])
|
||||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||||
|
|
||||||
|
@pytest.mark.level0
|
||||||
|
@pytest.mark.platform_x86_gpu_training
|
||||||
|
@pytest.mark.env_onecard
|
||||||
|
def test_scatter_add_input_updated():
|
||||||
|
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
|
||||||
|
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
|
||||||
|
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
|
||||||
|
lock = True
|
||||||
|
net = TestScatterAddNet(lock, inputx, indices, updates)
|
||||||
|
net()
|
||||||
|
expected = np.array([[6., 8., 10.],
|
||||||
|
[12., 14., 16.]])
|
||||||
|
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
|
||||||
|
|
||||||
@pytest.mark.level0
|
@pytest.mark.level0
|
||||||
@pytest.mark.platform_x86_gpu_training
|
@pytest.mark.platform_x86_gpu_training
|
||||||
@pytest.mark.env_onecard
|
@pytest.mark.env_onecard
|
||||||
|
@ -274,39 +291,16 @@ def test_scatter_add_input_less_than_1_dynamic_float32():
|
||||||
@pytest.mark.platform_x86_gpu_training
|
@pytest.mark.platform_x86_gpu_training
|
||||||
@pytest.mark.env_onecard
|
@pytest.mark.env_onecard
|
||||||
def test_scatter_add_dynamic_two_inputs():
|
def test_scatter_add_dynamic_two_inputs():
|
||||||
inputx_1 = Tensor(np.zeros((2, 3)).astype(np.float32))
|
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
|
||||||
indices_1 = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
|
indices_1 = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
|
||||||
updates_1 = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
|
updates_1 = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
|
||||||
inputx_2 = Tensor(np.ones((4, 2, 3, 4)).astype(np.float32))
|
indices_2 = Tensor(np.array([[0, 0], [1, 1], [1, 0]]).astype(np.int32))
|
||||||
indices_2 = Tensor(np.array([[0, 2], [3, 1]]).astype(np.int32))
|
updates_2 = Tensor(np.flip(np.arange(18).reshape((3, 2, 3)).astype(np.float32)))
|
||||||
updates_2 = Tensor(np.arange(96).reshape((2, 2, 2, 3, 4)).astype(np.float32))
|
output_1, output_2 = scatter_add_d2_net(inputx, indices_1, updates_1,
|
||||||
output_1, output_2 = scatter_add_d2_net(inputx_1, indices_1, updates_1,
|
indices_2, updates_2)
|
||||||
inputx_2, indices_2, updates_2)
|
|
||||||
expected_1 = np.array([[6., 8., 10.],
|
expected_1 = np.array([[6., 8., 10.],
|
||||||
[12., 14., 16.]])
|
[12., 14., 16.]])
|
||||||
expected_2 = np.array([[[[1., 2., 3., 4.],
|
expected_2 = np.array([[39., 38., 37.],
|
||||||
[5., 6., 7., 8.],
|
[36., 35., 34.]])
|
||||||
[9., 10., 11., 12.]],
|
|
||||||
[[13., 14., 15., 16.],
|
|
||||||
[17., 18., 19., 20.],
|
|
||||||
[21., 22., 23., 24.]]],
|
|
||||||
[[[73., 74., 75., 76.],
|
|
||||||
[77., 78., 79., 80.],
|
|
||||||
[81., 82., 83., 84.]],
|
|
||||||
[[85., 86., 87., 88.],
|
|
||||||
[89., 90., 91., 92.],
|
|
||||||
[93., 94., 95., 96.]]],
|
|
||||||
[[[25., 26., 27., 28.],
|
|
||||||
[29., 30., 31., 32.],
|
|
||||||
[33., 34., 35., 36.]],
|
|
||||||
[[37., 38., 39., 40.],
|
|
||||||
[41., 42., 43., 44.],
|
|
||||||
[45., 46., 47., 48.]]],
|
|
||||||
[[[49., 50., 51., 52.],
|
|
||||||
[53., 54., 55., 56.],
|
|
||||||
[57., 58., 59., 60.]],
|
|
||||||
[[61., 62., 63., 64.],
|
|
||||||
[65., 66., 67., 68.],
|
|
||||||
[69., 70., 71., 72.]]]])
|
|
||||||
np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1)
|
np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1)
|
||||||
np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2)
|
np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2)
|
||||||
|
|
|
@ -50,8 +50,9 @@ class TestScatterUpdateDynamicNet(nn.Cell):
|
||||||
self.updates = Parameter(updates, name="updates")
|
self.updates = Parameter(updates, name="updates")
|
||||||
|
|
||||||
def construct(self):
|
def construct(self):
|
||||||
out = self.test_dynamic(self.inputx)
|
indices = self.test_dynamic(self.indices)
|
||||||
out = self.scatter_update(out, self.indices, self.updates)
|
updates = self.test_dynamic(self.updates)
|
||||||
|
out = self.scatter_update(self.inputx, indices, updates)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
def scatter_update_d_net(inputx, indices, updates):
|
def scatter_update_d_net(inputx, indices, updates):
|
||||||
|
@ -60,22 +61,24 @@ def scatter_update_d_net(inputx, indices, updates):
|
||||||
return net()
|
return net()
|
||||||
|
|
||||||
class TestScatterUpdateDynamicNet2(nn.Cell):
|
class TestScatterUpdateDynamicNet2(nn.Cell):
|
||||||
def __init__(self):
|
def __init__(self, inputx):
|
||||||
super(TestScatterUpdateDynamicNet2, self).__init__()
|
super(TestScatterUpdateDynamicNet2, self).__init__()
|
||||||
self.scatter_update = P.ScatterUpdate()
|
self.scatter_update = P.ScatterUpdate()
|
||||||
self.test_dynamic = inner.GpuConvertToDynamicShape()
|
self.test_dynamic = inner.GpuConvertToDynamicShape()
|
||||||
|
self.inputx = Parameter(inputx, name="inputx")
|
||||||
|
|
||||||
def construct(self, inputx, indices, updates):
|
def construct(self, indices, updates):
|
||||||
out = self.test_dynamic(inputx)
|
indices = self.test_dynamic(indices)
|
||||||
out = self.scatter_update(out, indices, updates)
|
updates = self.test_dynamic(updates)
|
||||||
|
out = self.scatter_update(self.inputx, indices, updates)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
def scatter_update_d2_net(inputx_1, indices_1, updates_1, inputx_2,
|
def scatter_update_d2_net(inputx, indices_1, updates_1,
|
||||||
indices_2, updates_2):
|
indices_2, updates_2):
|
||||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||||
net = TestScatterUpdateDynamicNet2()
|
net = TestScatterUpdateDynamicNet2(inputx)
|
||||||
out1 = net(inputx_1, indices_1, updates_1)
|
out1 = net(indices_1, updates_1)
|
||||||
out2 = net(inputx_2, indices_2, updates_2)
|
out2 = net(indices_2, updates_2)
|
||||||
return (out1, out2)
|
return (out1, out2)
|
||||||
|
|
||||||
@pytest.mark.level0
|
@pytest.mark.level0
|
||||||
|
@ -90,6 +93,19 @@ def test_scatter_update_small_float32():
|
||||||
[3., 4., 5.]])
|
[3., 4., 5.]])
|
||||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||||
|
|
||||||
|
@pytest.mark.level0
|
||||||
|
@pytest.mark.platform_x86_gpu_training
|
||||||
|
@pytest.mark.env_onecard
|
||||||
|
def test_scatter_update_input_updated():
|
||||||
|
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
|
||||||
|
indices = Tensor(np.array([0, 1]).astype(np.int32))
|
||||||
|
updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32))
|
||||||
|
net = TestScatterUpdateNet(inputx, indices, updates)
|
||||||
|
net()
|
||||||
|
expected = np.array([[0., 1., 2.],
|
||||||
|
[3., 4., 5.]])
|
||||||
|
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
|
||||||
|
|
||||||
@pytest.mark.level0
|
@pytest.mark.level0
|
||||||
@pytest.mark.platform_x86_gpu_training
|
@pytest.mark.platform_x86_gpu_training
|
||||||
@pytest.mark.env_onecard
|
@pytest.mark.env_onecard
|
||||||
|
@ -328,20 +344,16 @@ def test_scatter_update_disordered_dynamic_int32():
|
||||||
@pytest.mark.platform_x86_gpu_training
|
@pytest.mark.platform_x86_gpu_training
|
||||||
@pytest.mark.env_onecard
|
@pytest.mark.env_onecard
|
||||||
def test_scatter_update_two_inputs():
|
def test_scatter_update_two_inputs():
|
||||||
inputx_1 = Tensor(np.zeros((2, 3)).astype(np.float32))
|
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
|
||||||
indices_1 = Tensor(np.array([0, 1]).astype(np.int32))
|
indices_1 = Tensor(np.array([0, 1]).astype(np.int32))
|
||||||
updates_1 = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32))
|
updates_1 = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32))
|
||||||
inputx_2 = Tensor(np.array([[0.214141, 0.415151, 0.51516],
|
indices_2 = Tensor(np.array([1]).astype(np.int32))
|
||||||
[0.876542, 0.451611, 0.55112],
|
updates_2 = Tensor(np.arange(34, 37).reshape((1, 3)).astype(np.float32))
|
||||||
[0.111244, 0.633333, 0.34444]]).astype(np.float32))
|
output_1, output_2 = scatter_update_d2_net(inputx, indices_1, updates_1,
|
||||||
indices_2 = Tensor(np.array([1, 0, 2]).astype(np.int32))
|
indices_2, updates_2)
|
||||||
updates_2 = Tensor(np.arange(34, 43).reshape((3, 3)).astype(np.float32))
|
|
||||||
output_1, output_2 = scatter_update_d2_net(inputx_1, indices_1, updates_1,
|
|
||||||
inputx_2, indices_2, updates_2)
|
|
||||||
expected_1 = np.array([[0., 1., 2.],
|
expected_1 = np.array([[0., 1., 2.],
|
||||||
[3., 4., 5.]])
|
[3., 4., 5.]], dtype=np.float32)
|
||||||
expected_2 = np.array([[37., 38., 39.],
|
expected_2 = np.array([[0., 1., 2.],
|
||||||
[34., 35., 36.],
|
[34., 35., 36.]], dtype=np.float32)
|
||||||
[40., 41., 42.]], dtype=np.float32)
|
|
||||||
np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1)
|
np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1)
|
||||||
np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2)
|
np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2)
|
||||||
|
|
Loading…
Reference in New Issue