diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_stack_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_stack_kernel.cc index 1feee8ad3cd..429e5a783ee 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_stack_kernel.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_stack_kernel.cc @@ -25,15 +25,18 @@ namespace kernel { using mindspore::device::TensorArrayMgr; using mindspore::device::TensorArrayPtr; TensorArrayStackCpuKernelMod::TensorArrayStackCpuKernelMod() - : handle_(0), value_size_(0), ele_size_(0), type_(nullptr) { + : handle_(0), value_size_(0), ele_size_(0), type_(nullptr), is_dynamic_(true) { ResetResource(); } void TensorArrayStackCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); kernel_node_ = kernel_node; + kernel_name_ = AnfAlgo::GetCNodeName(kernel_node); auto shape = AnfAlgo::GetNodeAttr>(kernel_node, "element_shape"); auto max_element = AnfAlgo::GetNodeAttr(kernel_node, "max_element"); + is_dynamic_ = AnfAlgo::GetNodeAttr(kernel_node, "is_dynamic_shape"); + auto size = AnfAlgo::GetNodeAttr(kernel_node, "size"); for (auto i : shape) { shapes_.push_back(LongToSize(i)); } @@ -42,7 +45,11 @@ void TensorArrayStackCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { for (auto i : shapes_) { ele_size_ *= i; } - value_size_ = ele_size_ * LongToSize(max_element); + if (is_dynamic_) { + value_size_ = ele_size_ * LongToSize(max_element); + } else { + value_size_ = ele_size_ * LongToSize(size); + } output_size_list_.push_back(value_size_); input_size_list_.push_back(sizeof(int64_t)); } @@ -61,6 +68,7 @@ void TensorArrayStackCpuKernelMod::ResetResource() noexcept { handle_ = 0; value_size_ = 0; ele_size_ = 0; + is_dynamic_ = true; shapes_.clear(); input_size_list_.clear(); output_size_list_.clear(); @@ -73,6 +81,14 @@ bool TensorArrayStackCpuKernelMod::Launch(const std::vector &inputs, auto out_value = GetDeviceAddress(outputs, 0); MS_EXCEPTION_IF_NULL(out_value); MS_EXCEPTION_IF_NULL(handle_addr); + + // Set out_value to zeros when TensorArray in static size. + if (!is_dynamic_) { + auto ret = memset_s(out_value, outputs[0]->size, 0, value_size_); + if (ret != EOK) { + MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', memset failed, errorno(" << ret << ")"; + } + } handle_ = handle_addr[0]; TensorArrayPtr tensors_ = TensorArrayMgr::GetInstance().GetTensorArray(handle_); MS_EXCEPTION_IF_NULL(tensors_); @@ -85,10 +101,12 @@ bool TensorArrayStackCpuKernelMod::Launch(const std::vector &inputs, MS_EXCEPTION_IF_NULL(src_addr); auto ret = memcpy_s(out_value + ele_size_ * i, out_ele_size, src_addr, ele_size_); if (ret != EOK) { - MS_LOG(EXCEPTION) << "Memcpy failed, errorno(" << ret << ")"; + MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', memcpy failed, errorno(" << ret << ")"; } } - PostExecute(); + if (is_dynamic_) { + PostExecute(); + } return true; } } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_stack_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_stack_kernel.h index 432911869f9..d2de1e2dcdc 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_stack_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/rl/tensor_array_stack_kernel.h @@ -45,6 +45,7 @@ class TensorArrayStackCpuKernelMod : public NativeCpuKernelMod { size_t ele_size_; std::vector shapes_; TypePtr type_; + bool is_dynamic_; }; MS_REG_CPU_KERNEL(TensorArrayStack, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_clear_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_clear_kernel.cc index da965bf90d2..c406cc6621d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_clear_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_clear_kernel.cc @@ -42,9 +42,7 @@ bool TensorArrayClearKernelMod::Launch(const std::vector &inputs, co auto handle_addr = GetDeviceAddress(inputs, 0); MS_ERROR_IF_NULL(handle_addr); int64_t handle = 0; - CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, - cudaMemcpyAsync(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost, - reinterpret_cast(stream)), + CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost), "Get handle to host failed"); TensorArrayPtr tensors_ = TensorArrayMgr::GetInstance().GetTensorArray(handle); MS_ERROR_IF_NULL(tensors_); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_close_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_close_kernel.cc index fdac08d343d..b5c4481bbd0 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_close_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_close_kernel.cc @@ -42,9 +42,7 @@ bool TensorArrayCloseKernelMod::Launch(const std::vector &inputs, co auto handle_addr = GetDeviceAddress(inputs, 0); MS_ERROR_IF_NULL(handle_addr); int64_t handle = 0; - CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, - cudaMemcpyAsync(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost, - reinterpret_cast(stream)), + CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost), "Get handle to host failed"); GPUTensorArrayPtr tensors_ = std::dynamic_pointer_cast(TensorArrayMgr::GetInstance().GetTensorArray(handle)); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_read_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_read_kernel.cc index 707369db4db..533217d5ead 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_read_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_read_kernel.cc @@ -57,9 +57,7 @@ bool TensorArrayReadKernelMod::Launch(const std::vector &inputs, con reinterpret_cast(stream)), "Get index to host failed"); int64_t handle = 0; - CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, - cudaMemcpyAsync(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost, - reinterpret_cast(stream)), + CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost), "Get handle to host failed"); TensorArrayPtr tensors_ = TensorArrayMgr::GetInstance().GetTensorArray(handle); MS_ERROR_IF_NULL(tensors_); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_stack_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_stack_kernel.cc index f3df617549f..954b9bd7d14 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_stack_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_stack_kernel.cc @@ -27,7 +27,7 @@ namespace kernel { using mindspore::device::TensorArrayMgr; using mindspore::device::TensorArrayPtr; TensorArrayStackKernelMod::TensorArrayStackKernelMod() - : handle_(0), value_size_(0), ele_size_(0), stream_ptr_(nullptr), type_(nullptr) { + : handle_(0), value_size_(0), ele_size_(0), stream_ptr_(nullptr), type_(nullptr), is_dynamic_(true) { ResetResource(); } @@ -36,6 +36,8 @@ bool TensorArrayStackKernelMod::Init(const CNodePtr &kernel_node) { kernel_node_ = kernel_node; auto shape = GetAttr>(kernel_node, "element_shape"); auto max_element = GetAttr(kernel_node, "max_element"); + is_dynamic_ = GetAttr(kernel_node, "is_dynamic_shape"); + auto size = GetAttr(kernel_node, "size"); for (auto i : shape) { shapes_.push_back(LongToSize(i)); } @@ -44,7 +46,14 @@ bool TensorArrayStackKernelMod::Init(const CNodePtr &kernel_node) { for (auto i : shapes_) { ele_size_ *= i; } - value_size_ = ele_size_ * LongToSize(max_element); + if (is_dynamic_) { + value_size_ = ele_size_ * LongToSize(max_element); + } else { + if (size <= 0) { + MS_LOG(EXCEPTION) << "Size should larger than 0 when is_dynamic_shape = false, but get " << size; + } + value_size_ = ele_size_ * LongToSize(size); + } InitSizeLists(); return true; } @@ -84,9 +93,14 @@ bool TensorArrayStackKernelMod::Launch(const std::vector &inputs, co auto out_value = GetDeviceAddress(outputs, 0); MS_ERROR_IF_NULL(out_value); MS_ERROR_IF_NULL(handle_addr); - CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, - cudaMemcpyAsync(&handle_, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost, - reinterpret_cast(stream_ptr)), + + // Set out_value to zeros when TensorArray in static size. + if (!is_dynamic_) { + CHECK_CUDA_RET_WITH_EXCEPT( + kernel_node_, cudaMemsetAsync(out_value, 0, outputs[0]->size, reinterpret_cast(stream_ptr)), + "Cudamemset output value failed"); + } + CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle_, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost), "Get handle to host failed"); TensorArrayPtr tensors_ = TensorArrayMgr::GetInstance().GetTensorArray(handle_); MS_ERROR_IF_NULL(tensors_); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_stack_kernel.h b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_stack_kernel.h index 690801e71a2..b7f56726d8d 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_stack_kernel.h +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_stack_kernel.h @@ -46,6 +46,7 @@ class TensorArrayStackKernelMod : public NativeGpuKernelMod { void *stream_ptr_; std::vector shapes_; TypePtr type_; + bool is_dynamic_; }; MS_REG_GPU_KERNEL(TensorArrayStack, TensorArrayStackKernelMod) diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_write_kernel.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_write_kernel.cc index 0226eae7616..0a24557bcbe 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_write_kernel.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/tensor_array_write_kernel.cc @@ -60,9 +60,7 @@ bool TensorArrayWriteKernelMod::Launch(const std::vector &inputs, co reinterpret_cast(stream)), "Get indexd failed"); int64_t handle = 0; - CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, - cudaMemcpyAsync(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost, - reinterpret_cast(stream)), + CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpy(&handle, handle_addr, sizeof(int64_t), cudaMemcpyDeviceToHost), "Get handle to host failed"); GPUTensorArrayPtr tensors_ = std::dynamic_pointer_cast(TensorArrayMgr::GetInstance().GetTensorArray(handle)); diff --git a/mindspore/core/abstract/prim_rl.cc b/mindspore/core/abstract/prim_rl.cc index b9352675330..41b5827c7e6 100644 --- a/mindspore/core/abstract/prim_rl.cc +++ b/mindspore/core/abstract/prim_rl.cc @@ -42,19 +42,37 @@ AbstractBasePtr InferImplTensorArrayStack(const AnalysisEnginePtr &, const Primi if (attr_dtype == nullptr) { MS_LOG(EXCEPTION) << "No attribute [dtype] in " << op_name; } + auto attr_is_dynamic = primitive->GetAttr("is_dynamic_shape"); + if (attr_is_dynamic == nullptr) { + MS_LOG(EXCEPTION) << "No attribute [is_dynamic_shape] in " << op_name; + } + auto attr_size = primitive->GetAttr("size"); + if (attr_size == nullptr) { + MS_LOG(EXCEPTION) << "No attribute [size] in " << op_name; + } + auto is_dynamic = GetValue(attr_is_dynamic); + auto size = GetValue(attr_size); auto ele_shape = GetValue>(attr_shape); auto type = GetValue(attr_dtype); primitive->set_attr("max_element", MakeValue(kMaxElement)); - auto max_shape_ = ele_shape; - auto min_shape_ = ele_shape; - auto out_shape_ = ele_shape; - (void)max_shape_.insert(max_shape_.begin(), kMaxElement); - (void)min_shape_.insert(min_shape_.begin(), 1); - (void)out_shape_.insert(out_shape_.begin(), -1); - ShapeVector out_shape = out_shape_; - ShapeVector min_shape = min_shape_; - ShapeVector max_shape = max_shape_; - auto output = std::make_shared(type, std::make_shared(out_shape, min_shape, max_shape)); + std::shared_ptr output; + if (is_dynamic) { + auto max_shape_ = ele_shape; + auto min_shape_ = ele_shape; + auto out_shape_ = ele_shape; + (void)max_shape_.insert(max_shape_.begin(), kMaxElement); + (void)min_shape_.insert(min_shape_.begin(), 1); + (void)out_shape_.insert(out_shape_.begin(), -1); + ShapeVector out_shape = out_shape_; + ShapeVector min_shape = min_shape_; + ShapeVector max_shape = max_shape_; + output = std::make_shared(type, std::make_shared(out_shape, min_shape, max_shape)); + } else { + auto out_shape_ = ele_shape; + (void)out_shape_.insert(out_shape_.begin(), size); + ShapeVector out_shape = out_shape_; + output = std::make_shared(type, std::make_shared(out_shape)); + } return output; } } // namespace abstract diff --git a/mindspore/python/mindspore/nn/reinforcement/tensor_array.py b/mindspore/python/mindspore/nn/reinforcement/tensor_array.py index 3277449c1ba..08fb3b0f09b 100644 --- a/mindspore/python/mindspore/nn/reinforcement/tensor_array.py +++ b/mindspore/python/mindspore/nn/reinforcement/tensor_array.py @@ -66,7 +66,7 @@ class TensorArray(Cell): self.tensor_array_read = ta.TensorArrayRead(dtype, element_shape) self.tensor_array_close = ta.TensorArrayClose() self.tensor_array_clear = ta.TensorArrayClear() - self.tensor_array_stack = ta.TensorArrayStack(dtype, element_shape) + self.tensor_array_stack = ta.TensorArrayStack(dtype, element_shape, dynamic_size, size) self.tensor_array_size = ta.TensorArraySize() def write(self, index, value): diff --git a/mindspore/python/mindspore/ops/operations/_tensor_array.py b/mindspore/python/mindspore/ops/operations/_tensor_array.py index db01c259d2d..963524d2c7c 100644 --- a/mindspore/python/mindspore/ops/operations/_tensor_array.py +++ b/mindspore/python/mindspore/ops/operations/_tensor_array.py @@ -250,12 +250,13 @@ class TensorArrayStack(Primitive): [1 2] """ @prim_attr_register - def __init__(self, dtype, element_shape): + def __init__(self, dtype, element_shape, dynamic_size, size): """Initialize TensorArrayStack""" self.init_prim_io_names(inputs=[''], outputs=['output']) self.add_prim_attr('dtype', dtype) self.add_prim_attr('element_shape', element_shape) - self.add_prim_attr('is_dynamic_shape', True) + self.add_prim_attr('is_dynamic_shape', dynamic_size) + self.add_prim_attr('size', size) self.add_prim_attr('side_effect_mem', True) diff --git a/tests/st/ops/cpu/test_tensor_array.py b/tests/st/ops/cpu/test_tensor_array.py index 175baeeebc4..ebf0cca44fc 100644 --- a/tests/st/ops/cpu/test_tensor_array.py +++ b/tests/st/ops/cpu/test_tensor_array.py @@ -21,9 +21,9 @@ import mindspore.nn as nn from mindspore import Tensor class TensorArrayNet(nn.Cell): - def __init__(self, dtype, element_shape): + def __init__(self, dtype, element_shape, is_dynamic_shape=True, size=0): super(TensorArrayNet, self).__init__() - self.ta = nn.TensorArray(dtype, element_shape) + self.ta = nn.TensorArray(dtype, element_shape, is_dynamic_shape, size) def construct(self, index, value): self.ta.write(index, value) @@ -92,3 +92,22 @@ def test_tensorarray(): assert np.allclose(v.asnumpy(), expect_s[1]) assert np.allclose(s.asnumpy(), expect_s) ta.close() + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_static_tensorarray(): + """ + Feature: TensorArray cpu TEST. + Description: Test the static tensorarray. + Expectation: success. + """ + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + index = Tensor(0, mindspore.int64) + value = Tensor(5, mindspore.int64) + ta = TensorArrayNet(dtype=mindspore.int64, element_shape=(), is_dynamic_shape=False, size=12) + v, s = ta(index, value) + expect_v = 5 + expect_s = [5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + assert np.allclose(s.asnumpy(), expect_s) + assert np.allclose(v.asnumpy(), expect_v) diff --git a/tests/st/ops/gpu/test_tensor_array.py b/tests/st/ops/gpu/test_tensor_array.py index 1b23c64030e..c039f16b7f0 100644 --- a/tests/st/ops/gpu/test_tensor_array.py +++ b/tests/st/ops/gpu/test_tensor_array.py @@ -21,9 +21,9 @@ import mindspore.nn as nn from mindspore import Tensor class TensorArrayNet(nn.Cell): - def __init__(self, dtype, element_shape): + def __init__(self, dtype, element_shape, is_dynamic_shape=True, size=0): super(TensorArrayNet, self).__init__() - self.ta = nn.TensorArray(dtype, element_shape) + self.ta = nn.TensorArray(dtype, element_shape, is_dynamic_shape, size) def construct(self, index, value): for i in range(2): @@ -59,43 +59,62 @@ def test_tensorarray(): assert np.allclose(v.asnumpy(), expect_v) context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") - ta = nn.TensorArray(mindspore.int64, ()) + tb = nn.TensorArray(mindspore.int64, ()) for i in range(5): - ta.write(i, 99) - v = ta.read(0) - s = ta.stack() + tb.write(i, 99) + v = tb.read(0) + s = tb.stack() expect_v = 99 expect_s = [99, 99, 99, 99, 99] assert np.allclose(s.asnumpy(), expect_s) assert np.allclose(v.asnumpy(), expect_v) - ta_size = ta.size() - assert np.allclose(ta_size.asnumpy(), 5) - ta.clear() - ta_size = ta.size() - assert np.allclose(ta_size.asnumpy(), 0) - ta.write(0, 88) - v = ta.read(0) - s = ta.stack() - ta.close() + tb_size = tb.size() + assert np.allclose(tb_size.asnumpy(), 5) + tb.clear() + tb_size = tb.size() + assert np.allclose(tb_size.asnumpy(), 0) + tb.write(0, 88) + v = tb.read(0) + s = tb.stack() + tb.close() expect_v = 88 expect_s = [88] assert np.allclose(s.asnumpy(), expect_s) assert np.allclose(v.asnumpy(), expect_v) - ta = nn.TensorArray(mindspore.float32, ()) - ta.write(5, 1.) - s = ta.stack() + tc = nn.TensorArray(mindspore.float32, ()) + tc.write(5, 1.) + s = tc.stack() expect_s = [0., 0., 0., 0., 0., 1.] assert np.allclose(s.asnumpy(), expect_s) - ta.write(2, 1.) - s = ta.stack() + tc.write(2, 1.) + s = tc.stack() expect_s = [0., 0., 1., 0., 0., 1.] assert np.allclose(s.asnumpy(), expect_s) - ta.close() - ta = nn.TensorArray(mindspore.bool_, ()) - ta.write(1, Tensor(True, mindspore.bool_)) - s = ta.stack() - v = ta.read(1) + tc.close() + td = nn.TensorArray(mindspore.bool_, ()) + td.write(1, Tensor(True, mindspore.bool_)) + s = td.stack() + v = td.read(1) expect_s = [False, True] assert np.allclose(v.asnumpy(), expect_s[1]) assert np.allclose(s.asnumpy(), expect_s) - ta.close() + td.close() + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_static_tensorarray(): + """ + Feature: TensorArray gpu TEST. + Description: Test the static tensorarray. + Expectation: success. + """ + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + index = Tensor(0, mindspore.int64) + value = Tensor(5, mindspore.int64) + ta = TensorArrayNet(dtype=mindspore.int64, element_shape=(), is_dynamic_shape=False, size=12) + v, s = ta(index, value) + expect_v = 24 + expect_s = [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 0, 0] + assert np.allclose(s.asnumpy(), expect_s) + assert np.allclose(v.asnumpy(), expect_v)