!25943 TensorArray

Merge pull request !25943 from VectorSL/tensor-array-v2
2021-11-25 03:57:45 +00:00 · 2021-11-25 03:57:45 +00:00 · e96f33e887
parent d0792507bd 710289a72d
commit e96f33e887
29 changed files with 1908 additions and 3 deletions
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_clear_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_clear_kernel.cc
@ -0,0 +1,57 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/rl/tensor_array_clear_kernel.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/gpu/gpu_tensor_array.h"
+#include "runtime/device/tensor_array_manager.h"
+
+namespace mindspore {
+namespace kernel {
+using mindspore::device::TensorArrayMgr;
+using mindspore::device::gpu::GPUTensorArray;
+using mindspore::device::gpu::GPUTensorArrayPtr;
+TensorArrayClearKernel::TensorArrayClearKernel() {}
+
+const std::vector<size_t> &TensorArrayClearKernel::GetInputSizeList() const { return input_size_list_; }
+
+const std::vector<size_t> &TensorArrayClearKernel::GetOutputSizeList() const { return output_size_list_; }
+
+const std::vector<size_t> &TensorArrayClearKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
+
+bool TensorArrayClearKernel::Init(const CNodePtr &kernel_node) {
+  MS_EXCEPTION_IF_NULL(kernel_node);
+  InitSizeLists();
+  return true;
+}
+
+void TensorArrayClearKernel::InitSizeLists() {
+  input_size_list_.push_back(sizeof(int64_t));
+  output_size_list_.push_back(sizeof(int64_t));
+}
+
+bool TensorArrayClearKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+                                    const std::vector<AddressPtr> &, void *) {
+  auto handle_addr = GetDeviceAddress<int64_t>(inputs, 0);
+  GPUTensorArrayPtr tensors_ =
+    std::dynamic_pointer_cast<GPUTensorArray>(TensorArrayMgr::GetInstance().GetTensorArray(handle_addr));
+  MS_ERROR_IF_NULL(tensors_);
+  // Clear TensorArray valid size, but keep the memory.
+  tensors_->Clear();
+  return true;
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_clear_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_clear_kernel.h
@ -0,0 +1,51 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_CLEAR_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_CLEAR_KERNEL_H_
+
+#include <vector>
+#include <string>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+class TensorArrayClearKernel : public GpuKernel {
+ public:
+  TensorArrayClearKernel();
+  ~TensorArrayClearKernel() = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override;
+  const std::vector<size_t> &GetOutputSizeList() const override;
+  const std::vector<size_t> &GetWorkspaceSizeList() const override;
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
+  bool Init(const CNodePtr &kernel_node) override;
+
+ protected:
+  void InitSizeLists() override;
+
+ private:
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+
+MS_REG_GPU_KERNEL(TensorArrayClear, TensorArrayClearKernel)
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_CLEAR_KERNEL_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_close_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_close_kernel.cc
@ -0,0 +1,61 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/rl/tensor_array_close_kernel.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/gpu/gpu_tensor_array.h"
+#include "runtime/device/tensor_array_manager.h"
+
+namespace mindspore {
+namespace kernel {
+using mindspore::device::TensorArrayMgr;
+using mindspore::device::gpu::GPUTensorArray;
+using mindspore::device::gpu::GPUTensorArrayPtr;
+TensorArrayCloseKernel::TensorArrayCloseKernel() {}
+
+const std::vector<size_t> &TensorArrayCloseKernel::GetInputSizeList() const { return input_size_list_; }
+
+const std::vector<size_t> &TensorArrayCloseKernel::GetOutputSizeList() const { return output_size_list_; }
+
+const std::vector<size_t> &TensorArrayCloseKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
+
+bool TensorArrayCloseKernel::Init(const CNodePtr &kernel_node) {
+  MS_EXCEPTION_IF_NULL(kernel_node);
+  InitSizeLists();
+  return true;
+}
+
+void TensorArrayCloseKernel::InitSizeLists() {
+  input_size_list_.push_back(sizeof(int64_t));
+  output_size_list_.push_back(sizeof(int64_t));
+}
+
+bool TensorArrayCloseKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+                                    const std::vector<AddressPtr> &, void *) {
+  auto handle_addr = GetDeviceAddress<int64_t>(inputs, 0);
+  GPUTensorArrayPtr tensors_ =
+    std::dynamic_pointer_cast<GPUTensorArray>(TensorArrayMgr::GetInstance().GetTensorArray(handle_addr));
+  MS_ERROR_IF_NULL(tensors_);
+  // Free device mem
+  tensors_->Free();
+  // Erase tensorarray
+  if (!TensorArrayMgr::GetInstance().EraseTensorArray(handle_addr)) {
+    MS_LOG(EXCEPTION) << "Free tensorarray failed";
+  }
+  return true;
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_close_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_close_kernel.h
@ -0,0 +1,51 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_CLOSE_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_CLOSE_KERNEL_H_
+
+#include <vector>
+#include <string>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+class TensorArrayCloseKernel : public GpuKernel {
+ public:
+  TensorArrayCloseKernel();
+  ~TensorArrayCloseKernel() = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override;
+  const std::vector<size_t> &GetOutputSizeList() const override;
+  const std::vector<size_t> &GetWorkspaceSizeList() const override;
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
+  bool Init(const CNodePtr &kernel_node) override;
+
+ protected:
+  void InitSizeLists() override;
+
+ private:
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+
+MS_REG_GPU_KERNEL(TensorArrayClose, TensorArrayCloseKernel)
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_CLOSE_KERNEL_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_create_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_create_kernel.cc
@ -0,0 +1,73 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <memory>
+#include "backend/kernel_compiler/gpu/rl/tensor_array_create_kernel.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/gpu/gpu_tensor_array.h"
+#include "runtime/device/tensor_array_manager.h"
+
+namespace mindspore {
+namespace kernel {
+using mindspore::device::TensorArrayMgr;
+using mindspore::device::gpu::GPUTensorArray;
+using mindspore::device::gpu::GPUTensorArrayPtr;
+TensorArrayCreateKernel::TensorArrayCreateKernel() : is_dynamic_(true), size_(0) {}
+
+const std::vector<size_t> &TensorArrayCreateKernel::GetInputSizeList() const { return input_size_list_; }
+
+const std::vector<size_t> &TensorArrayCreateKernel::GetOutputSizeList() const { return output_size_list_; }
+
+const std::vector<size_t> &TensorArrayCreateKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
+
+bool TensorArrayCreateKernel::Init(const CNodePtr &kernel_node) {
+  MS_EXCEPTION_IF_NULL(kernel_node);
+  auto shape = GetAttr<std::vector<int64_t>>(kernel_node, "element_shape");
+  for (auto i : shape) {
+    shapes_.push_back(LongToSize(i));
+  }
+  type_ = GetAttr<TypePtr>(kernel_node, "dtype");
+  size_ = GetAttr<int64_t>(kernel_node, "size");
+  is_dynamic_ = GetAttr<bool>(kernel_node, "dynamic_size");
+  name_ = GetAttr<std::string>(kernel_node, "name");
+  InitSizeLists();
+  return true;
+}
+
+void TensorArrayCreateKernel::InitSizeLists() { output_size_list_.push_back(sizeof(int64_t)); }
+
+bool TensorArrayCreateKernel::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
+                                     const std::vector<AddressPtr> &outputs, void *stream_ptr) {
+  // Create a tensorarray, and generate an unique handle.
+  int64_t tensor_array_handle = TensorArrayMgr::GetInstance().GetHandleCount();
+  auto name = "GPUTensorArray_" + name_ + "_" + std::to_string(tensor_array_handle);
+  GPUTensorArrayPtr tensor_array = std::make_shared<GPUTensorArray>(name, type_, shapes_);
+  MS_EXCEPTION_IF_NULL(tensor_array);
+  tensor_array->SetMaxSize(size_, is_dynamic_);
+  auto out_addr = GetDeviceAddress<int64_t>(outputs, 0);
+  // Set handle to out_addr.
+  CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
+                             cudaMemcpyAsync(out_addr, &tensor_array_handle, sizeof(int64_t), cudaMemcpyHostToDevice,
+                                             reinterpret_cast<cudaStream_t>(stream_ptr)),
+                             "Create TensorArray failed");
+  MS_LOG(DEBUG) << "Create handle id " << tensor_array_handle;
+  // Put tensorarray to a saved map : map<handle, tensorarray> in tensorarray manager.
+  // Only put the device addr as the key to avoid a copy from device to host.
+  // The output handle address will kept and won't be reused.
+  TensorArrayMgr::GetInstance().AddTensorArray(out_addr, tensor_array);
+  return true;
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_create_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_create_kernel.h
@ -0,0 +1,56 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_CREATE_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_CREATE_KERNEL_H_
+
+#include <vector>
+#include <string>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+class TensorArrayCreateKernel : public GpuKernel {
+ public:
+  TensorArrayCreateKernel();
+  ~TensorArrayCreateKernel() = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override;
+  const std::vector<size_t> &GetOutputSizeList() const override;
+  const std::vector<size_t> &GetWorkspaceSizeList() const override;
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
+  bool Init(const CNodePtr &kernel_node) override;
+
+ protected:
+  void InitSizeLists() override;
+
+ private:
+  bool is_dynamic_;
+  int64_t size_;
+  std::vector<size_t> shapes_;
+  TypePtr type_;
+  std::string name_;
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+
+MS_REG_GPU_KERNEL(TensorArray, TensorArrayCreateKernel)
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_CREATE_KERNEL_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_read_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_read_kernel.cc
@ -0,0 +1,79 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/rl/tensor_array_read_kernel.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/gpu/gpu_tensor_array.h"
+#include "runtime/device/tensor_array_manager.h"
+
+namespace mindspore {
+namespace kernel {
+using mindspore::device::TensorArrayMgr;
+using mindspore::device::gpu::GPUTensorArray;
+using mindspore::device::gpu::GPUTensorArrayPtr;
+TensorArrayReadKernel::TensorArrayReadKernel() : value_size_(0), type_(nullptr) {}
+
+const std::vector<size_t> &TensorArrayReadKernel::GetInputSizeList() const { return input_size_list_; }
+
+const std::vector<size_t> &TensorArrayReadKernel::GetOutputSizeList() const { return output_size_list_; }
+
+const std::vector<size_t> &TensorArrayReadKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
+
+bool TensorArrayReadKernel::Init(const CNodePtr &kernel_node) {
+  MS_EXCEPTION_IF_NULL(kernel_node);
+  shapes_ = GetAttr<std::vector<int64_t>>(kernel_node, "element_shape");
+  type_ = GetAttr<TypePtr>(kernel_node, "dtype");
+  value_size_ = GetTypeByte(type_);
+  for (auto i : shapes_) {
+    value_size_ *= i;
+  }
+  InitSizeLists();
+  return true;
+}
+
+void TensorArrayReadKernel::InitSizeLists() {
+  input_size_list_.push_back(sizeof(int64_t));
+  input_size_list_.push_back(sizeof(int64_t));
+  output_size_list_.push_back(value_size_);
+}
+
+bool TensorArrayReadKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+                                   const std::vector<AddressPtr> &outputs, void *stream) {
+  auto handle_addr = GetDeviceAddress<int64_t>(inputs, 0);
+  auto index = GetDeviceAddress<int64_t>(inputs, 1);
+  auto out_value = GetDeviceAddress<unsigned char>(outputs, 0);
+  MS_ERROR_IF_NULL(out_value);
+  int64_t index_host = 0;
+  CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
+                             cudaMemcpyAsync(&index_host, index, sizeof(int64_t), cudaMemcpyDeviceToHost,
+                                             reinterpret_cast<cudaStream_t>(stream)),
+                             "Get index failed");
+  GPUTensorArrayPtr tensors_ =
+    std::dynamic_pointer_cast<GPUTensorArray>(TensorArrayMgr::GetInstance().GetTensorArray(handle_addr));
+  MS_ERROR_IF_NULL(tensors_);
+  if (!tensors_->CheckReadIndexLogical(index_host)) {
+    MS_LOG(EXCEPTION) << "Invalid index " << index_host << " for read.";
+  }
+  auto value_addr = tensors_->Read(index_host);
+  MS_LOG(DEBUG) << "Read value index:" << index_host;
+  CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
+                             cudaMemcpyAsync(out_value, value_addr->addr, value_size_, cudaMemcpyDeviceToDevice,
+                                             reinterpret_cast<cudaStream_t>(stream)),
+                             "Get value failed");
+  return true;
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_read_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_read_kernel.h
@ -0,0 +1,56 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_READ_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_READ_KERNEL_H_
+
+#include <vector>
+#include <string>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+class TensorArrayReadKernel : public GpuKernel {
+ public:
+  TensorArrayReadKernel();
+  ~TensorArrayReadKernel() = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override;
+  const std::vector<size_t> &GetOutputSizeList() const override;
+  const std::vector<size_t> &GetWorkspaceSizeList() const override;
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
+  bool Init(const CNodePtr &kernel_node) override;
+
+ protected:
+  void InitSizeLists() override;
+
+ private:
+  size_t value_size_;
+  std::vector<int64_t> shapes_;
+  TypePtr type_;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+
+MS_REG_GPU_KERNEL(TensorArrayRead, TensorArrayReadKernel)
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_READ_KERNEL_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_size_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_size_kernel.cc
@ -0,0 +1,63 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/rl/tensor_array_size_kernel.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/gpu/gpu_tensor_array.h"
+#include "runtime/device/tensor_array_manager.h"
+
+namespace mindspore {
+namespace kernel {
+using mindspore::device::TensorArrayMgr;
+using mindspore::device::gpu::GPUTensorArray;
+using mindspore::device::gpu::GPUTensorArrayPtr;
+TensorArraySizeKernel::TensorArraySizeKernel() {}
+
+const std::vector<size_t> &TensorArraySizeKernel::GetInputSizeList() const { return input_size_list_; }
+
+const std::vector<size_t> &TensorArraySizeKernel::GetOutputSizeList() const { return output_size_list_; }
+
+const std::vector<size_t> &TensorArraySizeKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
+
+bool TensorArraySizeKernel::Init(const CNodePtr &kernel_node) {
+  MS_EXCEPTION_IF_NULL(kernel_node);
+  InitSizeLists();
+  return true;
+}
+
+void TensorArraySizeKernel::InitSizeLists() {
+  input_size_list_.push_back(sizeof(int64_t));
+  output_size_list_.push_back(sizeof(int64_t));
+}
+
+bool TensorArraySizeKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+                                   const std::vector<AddressPtr> &outputs, void *stream_ptr) {
+  auto handle_addr = GetDeviceAddress<int64_t>(inputs, 0);
+  auto out_addr = GetDeviceAddress<int64_t>(outputs, 0);
+  GPUTensorArrayPtr tensors_ =
+    std::dynamic_pointer_cast<GPUTensorArray>(TensorArrayMgr::GetInstance().GetTensorArray(handle_addr));
+  MS_ERROR_IF_NULL(tensors_);
+  int64_t valid_size = SizeToLong(tensors_->GetValidSize());
+  MS_LOG(DEBUG) << "Launch TensorArraySize, valid size is " << valid_size;
+  CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
+                             cudaMemcpyAsync(out_addr, &valid_size, sizeof(int64_t), cudaMemcpyHostToDevice,
+                                             reinterpret_cast<cudaStream_t>(stream_ptr)),
+                             "Get valid size failed");
+
+  return true;
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_size_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_size_kernel.h
@ -0,0 +1,51 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_SIZE_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_SIZE_KERNEL_H_
+
+#include <vector>
+#include <string>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+class TensorArraySizeKernel : public GpuKernel {
+ public:
+  TensorArraySizeKernel();
+  ~TensorArraySizeKernel() = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override;
+  const std::vector<size_t> &GetOutputSizeList() const override;
+  const std::vector<size_t> &GetWorkspaceSizeList() const override;
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
+  bool Init(const CNodePtr &kernel_node) override;
+
+ protected:
+  void InitSizeLists() override;
+
+ private:
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+
+MS_REG_GPU_KERNEL(TensorArraySize, TensorArraySizeKernel)
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_SIZE_KERNEL_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_stack_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_stack_kernel.cc
@ -0,0 +1,107 @@
+
+
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/rl/tensor_array_stack_kernel.h"
+#include <algorithm>
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/gpu/gpu_tensor_array.h"
+#include "runtime/device/tensor_array_manager.h"
+
+namespace mindspore {
+namespace kernel {
+using mindspore::device::TensorArrayMgr;
+using mindspore::device::gpu::GPUTensorArray;
+using mindspore::device::gpu::GPUTensorArrayPtr;
+TensorArrayStackKernel::TensorArrayStackKernel()
+    : handle_(nullptr), value_size_(0), ele_size_(0), stream_ptr_(nullptr) {
+  ResetResource();
+}
+
+const std::vector<size_t> &TensorArrayStackKernel::GetInputSizeList() const { return input_size_list_; }
+
+const std::vector<size_t> &TensorArrayStackKernel::GetOutputSizeList() const { return output_size_list_; }
+
+const std::vector<size_t> &TensorArrayStackKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
+
+bool TensorArrayStackKernel::Init(const CNodePtr &kernel_node) {
+  MS_EXCEPTION_IF_NULL(kernel_node);
+  kernel_node_ = kernel_node;
+  auto shape = GetAttr<std::vector<int64_t>>(kernel_node, "element_shape");
+  auto max_element = GetAttr<int64_t>(kernel_node, "max_element");
+  for (auto i : shape) {
+    shapes_.push_back(LongToSize(i));
+  }
+  type_ = GetAttr<TypePtr>(kernel_node, "dtype");
+  ele_size_ = GetTypeByte(type_);
+  for (auto i : shapes_) {
+    ele_size_ *= i;
+  }
+  value_size_ = ele_size_ * LongToSize(max_element);
+  InitSizeLists();
+  return true;
+}
+
+void TensorArrayStackKernel::PostExecute() {
+  CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr_)),
+                             "TensorArrayStack cudaStreamSynchronized failed");
+  GPUTensorArrayPtr tensors_ =
+    std::dynamic_pointer_cast<GPUTensorArray>(TensorArrayMgr::GetInstance().GetTensorArray(handle_));
+  size_t tensor_size = tensors_->GetValidSize();
+  auto shape = shapes_;
+  shape.insert(shape.begin(), tensor_size);
+  MS_LOG(DEBUG) << "After postexecute, the real shape of TensorArrayStack is " << shape;
+  AnfAlgo::SetOutputInferTypeAndShape({type_->type_id()}, {shape}, kernel_node_.lock().get());
+}
+
+void TensorArrayStackKernel::ResetResource() noexcept {
+  handle_ = nullptr;
+  value_size_ = 0;
+  ele_size_ = 0;
+  stream_ptr_ = nullptr;
+  shapes_.clear();
+  input_size_list_.clear();
+  output_size_list_.clear();
+  workspace_size_list_.clear();
+}
+
+void TensorArrayStackKernel::InitSizeLists() {
+  output_size_list_.push_back(value_size_);
+  input_size_list_.push_back(sizeof(int64_t));
+}
+
+bool TensorArrayStackKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+                                    const std::vector<AddressPtr> &outputs, void *stream_ptr) {
+  stream_ptr_ = stream_ptr;
+  handle_ = GetDeviceAddress<int64_t>(inputs, 0);
+  auto out_value = GetDeviceAddress<unsigned char>(outputs, 0);
+  MS_ERROR_IF_NULL(out_value);
+  GPUTensorArrayPtr tensors_ =
+    std::dynamic_pointer_cast<GPUTensorArray>(TensorArrayMgr::GetInstance().GetTensorArray(handle_));
+  if (tensors_->GetValidSize() > tensors_->GetRealSize()) {
+    MS_LOG(EXCEPTION) << "Invalid TensorArray size, maybe should Clear() TensorArray before next usage.";
+  }
+  for (size_t i = 0; i < tensors_->GetValidSize(); i++) {
+    CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
+                               cudaMemcpyAsync(out_value + ele_size_ * i, tensors_->GetTensorAddr(i), ele_size_,
+                                               cudaMemcpyDeviceToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)),
+                               "Stack value failed");
+  }
+  return true;
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_stack_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_stack_kernel.h
@ -0,0 +1,61 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_STACK_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_STACK_KERNEL_H_
+
+#include <string>
+#include <vector>
+#include <atomic>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+class TensorArrayStackKernel : public GpuKernel {
+ public:
+  TensorArrayStackKernel();
+  ~TensorArrayStackKernel() = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override;
+  const std::vector<size_t> &GetOutputSizeList() const override;
+  const std::vector<size_t> &GetWorkspaceSizeList() const override;
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
+  bool Init(const CNodePtr &kernel_node) override;
+  void PostExecute() override;
+  void ResetResource() noexcept override;
+
+ protected:
+  void InitSizeLists() override;
+
+ private:
+  int64_t *handle_;
+  int64_t value_size_;
+  int64_t ele_size_;
+  void *stream_ptr_;
+  std::vector<size_t> shapes_;
+  TypePtr type_;
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+
+MS_REG_GPU_KERNEL(TensorArrayStack, TensorArrayStackKernel)
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_STACK_KERNEL_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_write_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_write_kernel.cc
@ -0,0 +1,96 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <memory>
+#include "backend/kernel_compiler/gpu/rl/tensor_array_write_kernel.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/gpu/gpu_tensor_array.h"
+#include "runtime/device/tensor_array_manager.h"
+
+namespace mindspore {
+namespace kernel {
+constexpr size_t kSecondInputIndex = 2;
+using mindspore::device::TensorArrayMgr;
+using mindspore::device::gpu::GPUTensorArray;
+using mindspore::device::gpu::GPUTensorArrayPtr;
+TensorArrayWriteKernel::TensorArrayWriteKernel() : value_size_(0) {}
+
+const std::vector<size_t> &TensorArrayWriteKernel::GetInputSizeList() const { return input_size_list_; }
+
+const std::vector<size_t> &TensorArrayWriteKernel::GetOutputSizeList() const { return output_size_list_; }
+
+const std::vector<size_t> &TensorArrayWriteKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
+
+bool TensorArrayWriteKernel::Init(const CNodePtr &kernel_node) {
+  MS_EXCEPTION_IF_NULL(kernel_node);
+  type_ = AnfAlgo::GetInputDeviceDataType(kernel_node, kSecondInputIndex);
+  shapes_ = AnfAlgo::GetInputDeviceShape(kernel_node, kSecondInputIndex);
+  value_size_ = GetTypeByte(TypeIdToType(type_));
+  for (auto i : shapes_) {
+    value_size_ *= i;
+  }
+  InitSizeLists();
+  return true;
+}
+
+void TensorArrayWriteKernel::InitSizeLists() {
+  input_size_list_.push_back(sizeof(int64_t));
+  input_size_list_.push_back(sizeof(int64_t));
+  output_size_list_.push_back(sizeof(int64_t));
+}
+
+bool TensorArrayWriteKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs,
+                                    const std::vector<AddressPtr> &, void *stream) {
+  auto handle_addr = GetDeviceAddress<int64_t>(inputs, 0);
+  auto index = GetDeviceAddress<int64_t>(inputs, 1);
+  auto value = GetDeviceAddress<unsigned char>(inputs, 2);
+
+  int64_t index_host = 0;
+  CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
+                             cudaMemcpyAsync(&index_host, index, sizeof(int64_t), cudaMemcpyDeviceToHost,
+                                             reinterpret_cast<cudaStream_t>(stream)),
+                             "Get indexd failed");
+
+  GPUTensorArrayPtr tensors_ =
+    std::dynamic_pointer_cast<GPUTensorArray>(TensorArrayMgr::GetInstance().GetTensorArray(handle_addr));
+  MS_EXCEPTION_IF_NULL(tensors_);
+  if (!tensors_->CheckValue(type_, shapes_)) {
+    MS_LOG(EXCEPTION) << "Invalid input data for tensor array write op.";
+  }
+  // Manage the value : create/reuse a device memory, and copy the input value to it.
+  AddressPtr dev_addr = std::make_shared<kernel::Address>();
+  MS_EXCEPTION_IF_NULL(dev_addr);
+  if (tensors_->GetRealSize() > LongToSize(index_host)) {
+    dev_addr->addr = tensors_->Read(index_host)->addr;
+  } else {
+    dev_addr->addr = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(value_size_);
+    MS_LOG(DEBUG) << "Create tensor " << dev_addr->addr << ", size " << value_size_;
+  }
+  MS_EXCEPTION_IF_NULL(dev_addr->addr);
+  dev_addr->size = value_size_;
+  CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
+                             cudaMemcpyAsync(dev_addr->addr, value, value_size_, cudaMemcpyDeviceToDevice,
+                                             reinterpret_cast<cudaStream_t>(stream)),
+                             "Copy value failed");
+
+  if (tensors_->Write(index_host, dev_addr)) {
+    MS_LOG(DEBUG) << "Write to tensorarry succeed, index " << index_host;
+  } else {
+    MS_LOG(EXCEPTION) << "Failed to write.";
+  }
+  return true;
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_write_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/rl/tensor_array_write_kernel.h
@ -0,0 +1,55 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_WRITE_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_WRITE_KERNEL_H_
+
+#include <string>
+#include <vector>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+class TensorArrayWriteKernel : public GpuKernel {
+ public:
+  TensorArrayWriteKernel();
+  ~TensorArrayWriteKernel() = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override;
+  const std::vector<size_t> &GetOutputSizeList() const override;
+  const std::vector<size_t> &GetWorkspaceSizeList() const override;
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
+  bool Init(const CNodePtr &kernel_node) override;
+
+ protected:
+  void InitSizeLists() override;
+
+ private:
+  size_t value_size_;
+  std::vector<size_t> shapes_;
+  TypeId type_;
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+
+MS_REG_GPU_KERNEL(TensorArrayWrite, TensorArrayWriteKernel)
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_RL_TENSOR_ARRAY_WRITE_KERNEL_H_
--- a/mindspore/ccsrc/runtime/device/CMakeLists.txt
+++ b/mindspore/ccsrc/runtime/device/CMakeLists.txt
@ -1,7 +1,7 @@
 file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "common/*.cc"
    "kernel_info.cc" "executor/dynamic_kernel.cc" "executor/executor_callback.cc" "kernel_runtime.cc"
    "memory_manager.cc" "kernel_runtime_manager.cc" "convert_tensor_utils.cc" "memory_scheduler.cc"
-    "memory_offload_strategy.cc" "bucket.cc" "launch_kernel.cc" "launch_mul.cc"
+    "memory_offload_strategy.cc" "bucket.cc" "launch_kernel.cc" "launch_mul.cc" "tensor_array.cc"
 )

 if("${ENABLE_HIDDEN}" STREQUAL "OFF")
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_tensor_array.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_tensor_array.cc
@ -0,0 +1,113 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "runtime/device/gpu/gpu_tensor_array.h"
+#include <cuda_runtime_api.h>
+#include <vector>
+#include <string>
+#include <memory>
+#include "runtime/device/gpu/gpu_common.h"
+#include "runtime/device/gpu/gpu_memory_allocator.h"
+
+namespace mindspore {
+namespace device {
+namespace gpu {
+bool GPUTensorArray::CheckValue(const TypeId &dtype, const std::vector<size_t> &shape) {
+  MS_LOG(DEBUG) << "Check the data shape and type for " << name_;
+  if (dtype != dtype_->type_id()) {
+    MS_LOG(ERROR) << "Invalid data type " << TypeIdLabel(dtype) << " for " << name_ << ", the origin type is "
+                  << TypeIdLabel(dtype_->type_id());
+    return false;
+  }
+  if (shape != shapes_) {
+    MS_LOG(ERROR) << "Invalid data shape " << shape << " for " << name_ << ", the origin shape is " << shapes_;
+    return false;
+  }
+  return true;
+}
+
+bool GPUTensorArray::CheckReadIndexLogical(const int64_t index) {
+  if (LongToSize(index) >= valid_size_) {
+    MS_LOG(ERROR) << "Index " << index << " out of range " << valid_size_ << ", " << name_;
+    return false;
+  }
+  return true;
+}
+
+// Add tensor to the TensorArray and increase the size.
+// Cast 1: is_dynamic = False and index > max_size_, error.
+// Case 2: index > valid_size, fill the rest dev_value with zeros, and set valid_size to index + 1.
+// Case 3: index == tensors_.size(), we need to increase both real tensors_ size and valid size, and add
+// the new dev_value to tensors_.
+// Case 4: tensors_size() > index > valid_size, we can reuse the memory in tensors_[index], so
+// only increase the valid_size.
+bool GPUTensorArray::Write(const int64_t index, const mindspore::kernel::AddressPtr &dev_value) {
+  MS_LOG(DEBUG) << "Write dev_value to " << name_;
+  if (!is_dynamic_ && (index >= max_size_)) {
+    MS_LOG(ERROR) << name_ << " is not in dynamic size, the max_size is " << max_size_ << ", but get index " << index;
+    return false;
+  }
+  if (LongToSize(index) > valid_size_) {
+    // Create/reuse (index - valid_size) size dev_value with zeros.
+    // 1 create new mem : index > real_size ? index - real_size : 0
+    // 2 reuse old mem : index > real_size ? real_size - valid_size : index - valid_size
+    // 3 fill zeros : index - valid_size
+    size_t create_size = (LongToSize(index) > tensors_.size()) ? (LongToSize(index) - tensors_.size()) : 0;
+    for (size_t i = 0; i < create_size; i++) {
+      kernel::AddressPtr create_dev = std::make_shared<kernel::Address>();
+      create_dev->addr = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(dev_value->size);
+      create_dev->size = dev_value->size;
+      tensors_.push_back(create_dev);
+    }
+    tensors_.push_back(dev_value);
+    // FillZeros(valid_size_, index);
+    for (size_t i = valid_size_; i < LongToSize(index); i++) {
+      CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(cudaMemsetAsync(tensors_[i]->addr, 0, tensors_[i]->size),
+                                         "failed to set cuda memory with zeros.")
+    }
+    valid_size_ = LongToSize(index) + 1;
+  } else if (LongToSize(index) == tensors_.size()) {
+    MS_LOG(DEBUG) << "Write to index " << index << ", increase tensors' size to " << (tensors_.size() + 1);
+    tensors_.push_back(dev_value);
+    valid_size_++;
+  } else {
+    MS_LOG(DEBUG) << "Reuse tensors in position " << index << ", tensors size is " << tensors_.size();
+    if (LongToSize(index) == valid_size_) valid_size_++;
+  }
+  return true;
+}
+
+// Function Read() can get the tensors in the scope of tensors_.
+mindspore::kernel::AddressPtr GPUTensorArray::Read(const int64_t index) {
+  if (LongToSize(index) >= tensors_.size()) {
+    MS_LOG(EXCEPTION) << "Index " << index << " out of range " << tensors_.size() << ", " << name_;
+  }
+  MS_LOG(DEBUG) << "Read tensor index = " << index << ", addr = " << tensors_[LongToSize(index)]->addr;
+  return tensors_[LongToSize(index)];
+}
+
+// Free() will free the memory in TensorArray.
+void GPUTensorArray::Free() {
+  MS_LOG(DEBUG) << "Free device memory for " << name_;
+  for (const auto &addr : tensors_) {
+    if (addr != nullptr) {
+      device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(static_cast<void *>(addr->addr));
+    }
+  }
+}
+}  // namespace gpu
+}  // namespace device
+}  // namespace mindspore
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_tensor_array.h
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_tensor_array.h
@ -0,0 +1,75 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_TENSOR_ARRAY_H_
+#define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_TENSOR_ARRAY_H_
+
+#include <vector>
+#include <string>
+#include <memory>
+#include "runtime/device/gpu/gpu_memory_allocator.h"
+#include "runtime/device/tensor_array.h"
+
+namespace mindspore {
+namespace device {
+namespace gpu {
+class GPUTensorArray : public TensorArray {
+ public:
+  GPUTensorArray(const string &name, const TypePtr &dtype, const std::vector<size_t> &shapes)
+      : TensorArray(name, dtype, shapes) {}
+  ~GPUTensorArray() override = default;
+
+  // Check the dtype and shape of the input data. Used in Write().
+  bool CheckValue(const TypeId &dtype, const std::vector<size_t> &shape);
+  // Check the index in valid range. Used in Read().
+  bool CheckReadIndexLogical(const int64_t index);
+
+  // Add tensor to the TensorArray and increase the size.
+  bool Write(const int64_t index, const mindspore::kernel::AddressPtr &dev_value) override;
+
+  // Function Read() can get the tensors in the scope of tensors_.
+  mindspore::kernel::AddressPtr Read(const int64_t index) override;
+
+  // FreeTensorArray() will free the memory in TensorArray.
+  void Free() override;
+
+  // ClearTensorArray() will only set the valid size of TensorArray to zero. The memory in TensorArray is still
+  // kept, In this situation， we can reuse the memory for next use.
+  void Clear() override { valid_size_ = 0; }
+
+  size_t GetValidSize() const override { return valid_size_; }
+  size_t GetRealSize() const override { return tensors_.size(); }
+
+  void *GetTensorAddr(const size_t &index) const { return tensors_[index]->addr; }
+
+  void SetMaxSize(const int64_t size, const bool is_dynamic) override {
+    is_dynamic_ = is_dynamic;
+    if (!is_dynamic) {
+      max_size_ = size;
+    }
+  }
+
+ private:
+  int64_t max_size_;
+  bool is_dynamic_;
+};
+using GPUTensorArray = GPUTensorArray;
+using GPUTensorArrayPtr = std::shared_ptr<GPUTensorArray>;
+}  // namespace gpu
+}  // namespace device
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_TENSOR_ARRAY_H_
--- a/mindspore/ccsrc/runtime/device/tensor_array.cc
+++ b/mindspore/ccsrc/runtime/device/tensor_array.cc
@ -0,0 +1,32 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "runtime/device/tensor_array.h"
+
+namespace mindspore {
+namespace device {
+void TensorArray::Clear() {
+  valid_size_ = 0;
+  return;
+}
+
+size_t TensorArray::GetRealSize() const { return valid_size_; }
+
+void TensorArray::SetMaxSize(const int64_t size, const bool is_dynamic) {
+  MS_LOG(DEBUG) << name_ << " use default SetTensorArrayMaxSize, and keep it empty";
+  return;
+}
+}  // namespace device
+}  // namespace mindspore
--- a/mindspore/ccsrc/runtime/device/tensor_array.h
+++ b/mindspore/ccsrc/runtime/device/tensor_array.h
@ -0,0 +1,73 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_TENSOR_ARRAY_H_
+#define MINDSPORE_CCSRC_RUNTIME_DEVICE_TENSOR_ARRAY_H_
+
+#include <vector>
+#include <string>
+#include <memory>
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/kernel_compiler/kernel.h"
+
+namespace mindspore {
+namespace device {
+class TensorArray {
+ public:
+  // Base TensorArray. Constructed by name, dtype and shapes.
+  TensorArray(const string &name, const TypePtr &dtype, const std::vector<size_t> &shapes)
+      : name_(name), dtype_(dtype), shapes_(shapes), valid_size_(0) {}
+  virtual ~TensorArray() = default;
+
+  // Function Write() is used to insert or append dev_value to the position of index.
+  virtual bool Write(const int64_t index, const mindspore::kernel::AddressPtr &dev_value) = 0;
+
+  // Function Read() can get the tensors in the scope of tensors_.
+  virtual mindspore::kernel::AddressPtr Read(const int64_t index) = 0;
+
+  // Free() will free the memory in TensorArray.
+  virtual void Free() = 0;
+
+  // Clear() will only set the valid size of TensorArray to zero. The memory in TensorArray is still
+  // kept, In this situation， we can reuse the memory for next use.
+  virtual void Clear();
+
+  // A vector of tensor address are kept in a TensorArray. For memory reusing, we will keep the addr
+  // after Clear(), in this time, the valid size will be zero but the real size still kept as
+  // tensors_.size(). Overall， using GetValidSize() to get a logical TensorArray size, and using
+  // GetRealSize() to get a physical TensorArray size.
+  virtual size_t GetValidSize() const = 0;
+  virtual size_t GetRealSize() const;
+
+  // This function is used in the situation that is_dynamic == false then set the max size.
+  // Otherwise, it won't be used and use the default implement.
+  virtual void SetMaxSize(const int64_t size, const bool is_dynamic);
+
+ protected:
+  std::string name_;
+  TypePtr dtype_;
+  std::vector<size_t> shapes_;
+  size_t valid_size_;
+  // Using a vector tensors_ to store the dev_tensor_addr from Write().
+  std::vector<mindspore::kernel::AddressPtr> tensors_;
+};
+using TensorArray = TensorArray;
+using TensorArrayPtr = std::shared_ptr<TensorArray>;
+}  // namespace device
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_RUNTIME_DEVICE_TENSOR_ARRAY_H_
--- a/mindspore/ccsrc/runtime/device/tensor_array_manager.h
+++ b/mindspore/ccsrc/runtime/device/tensor_array_manager.h
@ -0,0 +1,83 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_TENSOR_ARRAY_MANAGER_H_
+#define MINDSPORE_CCSRC_RUNTIME_DEVICE_TENSOR_ARRAY_MANAGER_H_
+
+#include <vector>
+#include <string>
+#include <atomic>
+#include <utility>
+#include <map>
+#include "backend/session/kernel_graph.h"
+#include "backend/session/anf_runtime_algorithm.h"
+#include "runtime/device/tensor_array.h"
+
+namespace mindspore {
+namespace device {
+class TensorArrayMgr {
+ public:
+  // TensorArrayMgr is used to manage the TensorArrays.
+  TensorArrayMgr() {}
+  ~TensorArrayMgr() = default;
+
+  static TensorArrayMgr &GetInstance() noexcept {
+    static TensorArrayMgr instance;
+    return instance;
+  }
+
+  TensorArrayMgr(const TensorArrayMgr &) = delete;
+  TensorArrayMgr(const TensorArrayMgr &&) = delete;
+
+  void AddTensorArray(const int64_t *handle, const TensorArrayPtr &ta) {
+    MS_LOG(DEBUG) << "Add a TensorArray to map, handle addr is " << handle;
+    tensors_map_.emplace(std::make_pair(handle, ta));
+    // Increase handle count when added a TensorArray.
+    tensor_array_handle_count += 1;
+  }
+
+  TensorArrayPtr GetTensorArray(const int64_t *handle) {
+    if (!tensors_map_.count(handle)) {
+      MS_LOG(EXCEPTION) << "Error handle [" << handle << "] to get tensorarray";
+    } else {
+      MS_LOG(DEBUG) << "Get TensorArray succeed, handle is " << handle;
+      return tensors_map_[handle];
+    }
+  }
+
+  bool EraseTensorArray(const int64_t *handle) {
+    if (tensors_map_.count(handle)) {
+      MS_LOG(DEBUG) << "Erase tensorarray from map, handle number is " << handle;
+      tensors_map_.erase(handle);
+      return true;
+    } else {
+      MS_LOG(ERROR) << "Erase failed, no such handle " << handle;
+      return false;
+    }
+  }
+
+  int64_t GetHandleCount() const { return tensor_array_handle_count; }
+
+ private:
+  // Store the TensorArrays in a map, as pair(handle_addr, TensorArrayPtr).
+  std::map<const int64_t *, TensorArrayPtr> tensors_map_;
+  // Used as an unique handle number for each TensorArray.
+  std::atomic<int64_t> tensor_array_handle_count{0};
+};
+}  // namespace device
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_RUNTIME_DEVICE_TENSOR_ARRAY_MANAGER_H_
--- a/mindspore/core/abstract/infer_functions.h
+++ b/mindspore/core/abstract/infer_functions.h
@ -291,6 +291,8 @@ AbstractBasePtr InferImplTensorCopySlices(const AnalysisEnginePtr &, const Primi
                                          const AbstractBasePtrList &args_spec_list);
 AbstractBasePtr InferImplReal(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                              const AbstractBasePtrList &args_spec_list);
+AbstractBasePtr InferImplTensorArrayStack(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                          const AbstractBasePtrList &args_spec_list);
 template <typename T>
 AbstractBasePtr InferTupleOrListOrDictLen(const std::string &op_name, const AbstractBasePtrList &args_spec_list) {
  // Inputs: a tuple or list or dict.
--- a/mindspore/core/abstract/prim_rl.cc
+++ b/mindspore/core/abstract/prim_rl.cc
@ -0,0 +1,61 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string>
+#include <sstream>
+
+#include "ir/dtype.h"
+#include "utils/ms_utils.h"
+#include "base/core_ops.h"
+#include "abstract/param_validator.h"
+#include "abstract/infer_functions.h"
+#include "abstract/utils.h"
+#include "utils/ms_context.h"
+#include "utils/symbolic.h"
+#include "utils/shape_utils.h"
+
+namespace mindspore {
+namespace abstract {
+constexpr int64_t kMaxElement = 10000;
+AbstractBasePtr InferImplTensorArrayStack(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                          const AbstractBasePtrList &args_spec_list) {
+  // Infer TensorArrayStack
+  const std::string op_name = primitive->name();
+  auto attr_shape = primitive->GetAttr("element_shape");
+  if (attr_shape == nullptr) {
+    MS_LOG(EXCEPTION) << "No attribute [element_shape] in " << op_name;
+  }
+  auto attr_dtype = primitive->GetAttr("dtype");
+  if (attr_dtype == nullptr) {
+    MS_LOG(EXCEPTION) << "No attribute [dtype] in " << op_name;
+  }
+  auto ele_shape = GetValue<std::vector<int64_t>>(attr_shape);
+  auto type = GetValue<TypePtr>(attr_dtype);
+  primitive->set_attr("max_element", MakeValue(kMaxElement));
+  auto max_shape_ = ele_shape;
+  auto min_shape_ = ele_shape;
+  auto out_shape_ = ele_shape;
+  max_shape_.insert(max_shape_.begin(), kMaxElement);
+  min_shape_.insert(min_shape_.begin(), 1);
+  out_shape_.insert(out_shape_.begin(), -1);
+  ShapeVector out_shape = out_shape_;
+  ShapeVector min_shape = min_shape_;
+  ShapeVector max_shape = max_shape_;
+  auto output = std::make_shared<AbstractTensor>(type, std::make_shared<Shape>(out_shape, min_shape, max_shape));
+  return output;
+}
+}  // namespace abstract
+}  // namespace mindspore
--- a/mindspore/core/abstract/primitive_infer_map.cc
+++ b/mindspore/core/abstract/primitive_infer_map.cc
@ -236,6 +236,8 @@ PrimitiveEvalImplMap &GetPrimitiveToEvalImplMap() {
    {prim::kPrimMemCpyAsync, R{InferImplMemCpyAsync, nullptr, true}},
    {prim::kPrimFusedPushWeight, R{nullptr, nullptr, true}},
    {prim::kPrimFusedPullWeight, R{nullptr, nullptr, true}},
+    // RL Ops
+    {prim::kPrimTensorArrayStack, R{InferImplTensorArrayStack, nullptr, true}},
  };
  return prim_eval_implement_map;
 }
--- a/mindspore/core/base/core_ops.h
+++ b/mindspore/core/base/core_ops.h
@ -761,6 +761,9 @@ inline const PrimitivePtr kPrimDynamicBroadcastGradientArgs =
 // Random
 inline const PrimitivePtr kPrimStandardNormal = std::make_shared<Primitive>("StandardNormal");

+// RL Ops
+inline const PrimitivePtr kPrimTensorArrayStack = std::make_shared<Primitive>("TensorArrayStack");
+
 class DoSignaturePrimitive : public Primitive {
 public:
  explicit DoSignaturePrimitive(const std::string &name, const ValuePtr &function)
--- a/mindspore/nn/init.py
+++ b/mindspore/nn/init.py
@ -17,7 +17,8 @@ Neural Networks Cells.

 Pre-defined building blocks or computing units to construct neural networks.
 """
-from . import layer, loss, optim, metrics, wrap, grad, probability, sparse, dynamic_lr
+from . import layer, loss, optim, metrics, wrap, grad, probability, sparse, dynamic_lr,\
+              reinforcement
 from .learning_rate_schedule import *
 from .dynamic_lr import *
 from .cell import Cell, GraphKernel, GraphCell
@ -28,7 +29,7 @@ from .metrics import *
 from .wrap import *
 from .grad import Jvp, Vjp
 from .sparse import *
-
+from .reinforcement import *

 __all__ = ["Cell", "GraphKernel", "GraphCell"]
 __all__.extend(layer.__all__)
@ -40,5 +41,6 @@ __all__.extend(grad.__all__)
 __all__.extend(sparse.__all__)
 __all__.extend(learning_rate_schedule.__all__)
 __all__.extend(dynamic_lr.__all__)
+__all__.extend(reinforcement.__all__)

 __all__.sort()
--- a/mindspore/nn/reinforcement/init.py
+++ b/mindspore/nn/reinforcement/init.py
@ -0,0 +1,22 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+TensorArray.
+"""
+from .tensor_array import (TensorArray)
+
+__all__ = [
+    "TensorArray",
+]
--- a/mindspore/nn/reinforcement/tensor_array.py
+++ b/mindspore/nn/reinforcement/tensor_array.py
@ -0,0 +1,142 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+TensorArray
+"""
+from mindspore.nn.cell import Cell
+from mindspore.ops.operations import _tensor_array as ta
+from mindspore._checkparam import Rel, Validator
+from mindspore.common import dtype as mstype
+
+class TensorArray(Cell):
+    r"""TensorArray: a dynamic array to store tensors.
+
+    .. warning::
+        This is an experiential prototype that is subject to change and/or deletion.
+
+    Args:
+        dtype (mindspore.dtype): the data type in the TensorArray.
+        element_shape (List[int]): the shape of each tensor in a TensorArray.
+        dynamic_size (bool): if true, the size of TensorArray can be increased. Default: True.
+        size (int): if dynamic_size=False, `size` means the max_size of the TensorArray.
+        name (string): the name of this TensorArray. Default: "TA".
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+        >>> import mindspore
+        >>> import mindspore.nn as nn
+        >>> ta = nn.TensorArray(mindspore.int64, ())
+        >>> ta.write(0, 1)
+        >>> ta.write(1, 2)
+        >>> ans = ta.read(1)
+        >>> print(ans)
+        2
+        >>> s = ta.stack()
+        >>> print(s)
+        [1 2]
+        >>> ta.clear()
+        >>> ta.write(0, 3)
+        >>> ans = ta.read(0)
+        >>> print(ans)
+        3
+        >>> ta.close()
+    """
+    def __init__(self, dtype, element_shape, dynamic_size=True, size=0, name="TA"):
+        """Initialize TensorArray"""
+        super(TensorArray, self).__init__()
+        Validator.check_subclass("dtype", dtype, mstype.number_type, self.cls_name)
+        Validator.check_int(size, 0, Rel.GE, "size", self.cls_name)
+        self.handle_ = ta.TensorArray(dtype, element_shape, dynamic_size, size, name)()
+        self.tensor_array_write = ta.TensorArrayWrite()
+        self.tensor_array_read = ta.TensorArrayRead(dtype, element_shape)
+        self.tensor_array_close = ta.TensorArrayClose()
+        self.tensor_array_clear = ta.TensorArrayClear()
+        self.tensor_array_stack = ta.TensorArrayStack(dtype, element_shape)
+        self.tensor_array_size = ta.TensorArraySize()
+
+    def write(self, index, value):
+        """
+        Write value(Tensor) to TensorArray in position index.
+
+        Args:
+            index ([int, mindspore.int64]): The position to write.
+            value (Tensor): The value to add into the TensorArray.
+
+        Returns:
+            Bool, true.
+        """
+        self.tensor_array_write(self.handle_, index, value)
+        return True
+
+    def read(self, index):
+        """
+        Read tensor form the TensorArray by the given position index.
+
+        Args:
+            index ([int, mindspore.int64]): The given index to get the tensor.
+
+        Returns:
+            Tensor, the value in position index.
+        """
+        value = self.tensor_array_read(self.handle_, index)
+        return value
+
+    def close(self):
+        """
+        Close the created TensorArray.
+
+        .. warning::
+            Once close the TensorArray, every functions belong to this TensorArray will be disaviliable.
+            Every resources created in TensorArray will be removed. If this TensorArray will be used in next step
+            or somewhere, eg: next loop, please use `clear` instead.
+
+        Returns:
+            Bool, true.
+        """
+        self.tensor_array_close(self.handle_)
+        return True
+
+    def clear(self):
+        """
+        Clear the created TensorArray. Only reset the TensorArray, clear the data and reset the size
+        in TensorArray and keep the instance of this TensorArray.
+
+        Returns:
+            Bool, true.
+        """
+        self.tensor_array_clear(self.handle_)
+        return True
+
+    def stack(self):
+        """
+        Stack the values in TensorArray into a stacked Tensor.
+
+        Returns:
+            Tensor, all the values will be stacked into one tensor.
+        """
+        ans = self.tensor_array_stack(self.handle_)
+        return ans
+
+    def size(self):
+        """
+        The logical size of TensorArray.
+
+        Returns:
+            Tensor, the size of TensorArray.
+        """
+        size = self.tensor_array_size(self.handle_)
+        return size
--- a/mindspore/ops/operations/_tensor_array.py
+++ b/mindspore/ops/operations/_tensor_array.py
@ -0,0 +1,285 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Operators for TensorArray."""
+
+import mindspore as ms
+from ..._checkparam import Validator as validator
+from ..._checkparam import Rel
+from ...common import dtype as mstype
+from ..primitive import prim_attr_register, PrimitiveWithInfer, Primitive
+
+class TensorArray(PrimitiveWithInfer):
+    r"""
+    TensorArrayCreate used to create a TensorArray and return an unique handle.
+
+    Args:
+        dtype (mindspore.dtype): the data type in the TensorArray.
+        element_shape (List[int]): the shape of each tensor in a TensorArray.
+        dynamic_size (bool): If true the TensorArray can increase the size. Default: True.
+        size (int): The size of the TensorArray if dynamic_size = False.
+        name (string): the name of this TensorArray. Default: "TA".
+
+    Inputs:
+        None.
+
+    Outputs:
+        - **output** (Tensor[mindspore.int64]) - an unique handle binded to the TensorArray.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+        >>> import mindspore
+        >>> import mindspore.ops as ops
+        >>> create_op = ops.TensorArray(mindspore.int32, ())
+        >>> handle = create_op()
+        >>> print(handle)
+        0
+    """
+    @prim_attr_register
+    def __init__(self, dtype, element_shape, dynamic_size=True, size=0, name="TA"):
+        validator.check_type_name("dtype", dtype, mstype.number_type, self.name)
+        validator.check_int(size, 0, Rel.GE, "size", self.name)
+        self.add_prim_attr('dtype', dtype)
+        self.add_prim_attr('element_shape', element_shape)
+        self.add_prim_attr('dynamic_size', dynamic_size)
+        self.add_prim_attr('size', size)
+        self.add_prim_attr('side_effect_mem', True)
+        self.add_prim_attr('name', name)
+
+    def infer_shape(self):
+        return ()
+
+    def infer_dtype(self):
+        return mstype.int64
+
+class TensorArrayWrite(PrimitiveWithInfer):
+    r"""
+    TensorArrayWrite used to write tensor into a created TensorArray.
+
+    Inputs:
+        - **index** (Tensor[int64]) - The position to write.
+        - **value** (Tensor) - The value to add into the TensorArray.
+        - **handle** (Tensor[int64]) - The handle pointed to the TensorArray.
+
+    Outputs:
+        None.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+        >>> import mindspore
+        >>> import mindspore.ops as ops
+        >>> create_op = ops.TensorArray(mindspore.int32, ())
+        >>> handle = create_op()
+        >>> write_op = ops.TensorArrayWrite()
+        >>> write_op.write(handle, 0, 1)
+    """
+    @prim_attr_register
+    def __init__(self):
+        self.add_prim_attr('side_effect_mem', True)
+
+    def infer_shape(self, handle_shape, index_shape, value_shape):
+        return ()
+
+    def infer_dtype(self, handle_type, index_type, value_type):
+        validator.check_type_name("handle", handle_type, (ms.int64), self.name)
+        validator.check_type_name("index", index_type, (int, ms.int64), self.name)
+        validator.check_type_name("value", value_type, mstype.number_type, self.name)
+        return mstype.int64
+
+class TensorArrayRead(PrimitiveWithInfer):
+    r"""
+    TensorArrayRead used to read tensor from a created TensorArray by the given index.
+
+    Args:
+        dtype (mindspore.dtype): the data type in the TensorArray.
+        element_shape (List[int]): the shape of each tensor in a TensorArray.
+
+    Inputs:
+        - **index** (Tensor[int64]) - The position to read.
+        - **handle** (mindspore.int64) - The handle pointed to the TensorArray.
+
+    Outputs:
+        - **output** (Tensor) - the value in position index.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+        >>> import mindspore
+        >>> import mindspore.ops as ops
+        >>> create_op = ops.TensorArray(mindspore.int32, ())
+        >>> handle = create_op()
+        >>> write_op = ops.TensorArrayWrite()
+        >>> write_op.write(handle, 0, 1)
+        >>> read_op = ops.TensorArrayRead(mindspore.int32, ())
+        >>> ans = read_op(handle, 0)
+        >>> print(ans)
+        1
+    """
+    @prim_attr_register
+    def __init__(self, dtype, element_shape):
+        validator.check_type_name("dtype", dtype, mstype.number_type, self.name)
+        self.add_prim_attr('dtype', dtype)
+        self.add_prim_attr('element_shape', element_shape)
+        self.add_prim_attr('side_effect_mem', True)
+        self.dtype = dtype
+        self.shape = element_shape
+
+    def infer_shape(self, handle_shape, index_shape):
+        return self.shape
+
+    def infer_dtype(self, handle_type, index_type):
+        validator.check_type_name("handle", handle_type, (ms.int64), self.name)
+        validator.check_type_name("index", index_type, (int, ms.int64), self.name)
+        return self.dtype
+
+class TensorArrayClose(PrimitiveWithInfer):
+    r"""
+    TensorArrayClose used to close the created TensorArray. The resources in TensorArray will be deleted.
+
+    Inputs:
+        - **handle** (mindspore.int64) - The handle pointed to the TensorArray.
+
+    Outputs:
+        None.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+        >>> import mindspore
+        >>> import mindspore.ops as ops
+        >>> create_op = ops.TensorArray(mindspore.int32, ())
+        >>> handle = create_op()
+        >>> close_op = ops.TensorArrayClose()
+        >>> close_op(handle)
+    """
+    @prim_attr_register
+    def __init__(self):
+        self.add_prim_attr('side_effect_mem', True)
+
+    def infer_shape(self, handle_shape):
+        return ()
+
+    def infer_dtype(self, handle_type):
+        validator.check_type_name("handle", handle_type, (ms.int64), self.name)
+        return mstype.int64
+
+class TensorArrayClear(PrimitiveWithInfer):
+    r"""
+    TensorArrayClear used to reset the created TensorArray. The instance of TensorArray is still aviliable.
+
+    Inputs:
+        - **handle** (mindspore.int64) - The handle pointed to the TensorArray.
+
+    Outputs:
+        None.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+        >>> import mindspore
+        >>> import mindspore.ops as ops
+        >>> create_op = ops.TensorArray(mindspore.int32, ())
+        >>> handle = create_op()
+        >>> clear_op = ops.TensorArrayClear()
+        >>> clear_op(handle)
+    """
+    @prim_attr_register
+    def __init__(self):
+        self.add_prim_attr('side_effect_mem', True)
+
+    def infer_shape(self, handle_shape):
+        return ()
+
+    def infer_dtype(self, handle_type):
+        validator.check_type_name("handle", handle_type, (ms.int64), self.name)
+        return mstype.int64
+
+class TensorArrayStack(Primitive):
+    r"""
+    TensorArrayStack used to stack the tensors in a created TensorArray into one tensor.
+
+    Args:
+        dtype (mindspore.dtype): the data type in the TensorArray.
+        element_shape (List[int]): the shape of each tensor in a TensorArray.
+
+    Inputs:
+        - **handle** (mindspore.int64) - The handle pointed to the TensorArray.
+
+    Outputs:
+        - **output** (Tensor) - the stacked value from the TensorArray.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+        >>> import mindspore
+        >>> import mindspore.ops as ops
+        >>> create_op = ops.TensorArray(mindspore.int32, ())
+        >>> handle = create_op()
+        >>> write_op = ops.TensorArrayWrite()
+        >>> write_op.write(handle, 0, 1)
+        >>> write_op.write(handle, 1, 2)
+        >>> stack_op = ops.TensorArrayStack(mindspore.int32, ())
+        >>> ans = stack_op(handle)
+        >>> print(ans)
+        [1 2]
+    """
+    @prim_attr_register
+    def __init__(self, dtype, element_shape):
+        """Initialize TensorArrayStack"""
+        self.init_prim_io_names(inputs=[''], outputs=['output'])
+        self.add_prim_attr('dtype', dtype)
+        self.add_prim_attr('element_shape', element_shape)
+        self.add_prim_attr('is_dynamic_shape', True)
+        self.add_prim_attr('side_effect_mem', True)
+
+class TensorArraySize(PrimitiveWithInfer):
+    r"""
+    TensorArraySize used to get the logical size of the created TensorArray.
+
+    Inputs:
+        - **handle** (mindspore.int64) - The handle pointed to the TensorArray.
+
+    Outputs:
+        - **output** (Tensor[mindspore.int64]) - the logical size of the TensorArray.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+        >>> import mindspore
+        >>> import mindspore.ops as ops
+        >>> create_op = ops.TensorArray(mindspore.int32, ())
+        >>> handle = create_op()
+        >>> size_op = ops.TensorArraySize()
+        >>> size = size_op(handle)
+    """
+    @prim_attr_register
+    def __init__(self):
+        self.add_prim_attr('side_effect_mem', True)
+
+    def infer_shape(self, handle_shape):
+        return ()
+
+    def infer_dtype(self, handle_type):
+        validator.check_type_name("handle", handle_type, (ms.int64), self.name)
+        return mstype.int64
--- a/tests/st/ops/gpu/test_tensor_array.py
+++ b/tests/st/ops/gpu/test_tensor_array.py
@ -0,0 +1,93 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import numpy as np
+import pytest
+
+import mindspore
+import mindspore.context as context
+import mindspore.nn as nn
+from mindspore import Tensor
+
+class TensorArrayNet(nn.Cell):
+    def __init__(self, dtype, element_shape):
+        super(TensorArrayNet, self).__init__()
+        self.ta = nn.TensorArray(dtype, element_shape)
+
+    def construct(self, index, value):
+        for i in range(2):
+            for _ in range(10):
+                self.ta.write(index, value)
+                index += 1
+                value += 1
+            if i == 0:
+                self.ta.clear()
+                index = 0
+        v = self.ta.read(index-1)
+        s = self.ta.stack()
+        self.ta.close()
+        return v, s
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_tensorarray():
+    """
+    Feature: TensorArray gpu TEST.
+    Description: Test the function write, read, stack, clear, close in both graph and pynative mode.
+    Expectation: success.
+    """
+    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+    index = Tensor(0, mindspore.int64)
+    value = Tensor(5, mindspore.int64)
+    ta = TensorArrayNet(dtype=mindspore.int64, element_shape=())
+    v, s = ta(index, value)
+    expect_v = 24
+    expect_s = [15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
+    assert np.allclose(s.asnumpy(), expect_s)
+    assert np.allclose(v.asnumpy(), expect_v)
+
+    context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
+    ta = nn.TensorArray(mindspore.int64, ())
+    for i in range(5):
+        ta.write(i, 99)
+    v = ta.read(0)
+    s = ta.stack()
+    expect_v = 99
+    expect_s = [99, 99, 99, 99, 99]
+    assert np.allclose(s.asnumpy(), expect_s)
+    assert np.allclose(v.asnumpy(), expect_v)
+    ta_size = ta.size()
+    assert np.allclose(ta_size.asnumpy(), 5)
+    ta.clear()
+    ta_size = ta.size()
+    assert np.allclose(ta_size.asnumpy(), 0)
+    ta.write(0, 88)
+    v = ta.read(0)
+    s = ta.stack()
+    ta.close()
+    expect_v = 88
+    expect_s = [88]
+    assert np.allclose(s.asnumpy(), expect_s)
+    assert np.allclose(v.asnumpy(), expect_v)
+    ta = nn.TensorArray(mindspore.float32, ())
+    ta.write(5, 1.)
+    s = ta.stack()
+    expect_s = [0., 0., 0., 0., 0., 1.]
+    assert np.allclose(s.asnumpy(), expect_s)
+    ta.write(2, 1.)
+    s = ta.stack()
+    expect_s = [0., 0., 1., 0., 0., 1.]
+    assert np.allclose(s.asnumpy(), expect_s)
+    ta.close()