!28261 unfied runtime support the environ type in the control flow

Merge pull request !28261 from limingqi107/new_actor_runtime
2021-12-28 08:18:20 +00:00 · 2021-12-28 08:18:20 +00:00 · 15c663eb97
parent 0d430f024e 13809604eb
commit 15c663eb97
24 changed files with 1330 additions and 1 deletions
--- a/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
+++ b/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
@ -3,6 +3,7 @@ file(GLOB_RECURSE KERNEL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
    "kash/*.cc"
    "common_utils.cc"
    "oplib/*.cc"
+    "environ_manager.cc"
 )

 if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
@ -35,6 +36,7 @@ if(ENABLE_CPU)
        "cpu/pyfunc/*.cc"
        "cpu/rl/*.cc"
        "cpu/custom/*.cc"
+        "cpu/environ/*.cc"
    )

    if(NOT ENABLE_MPI)
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/environ/environ_cpu_create.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/environ/environ_cpu_create.cc
@ -0,0 +1,54 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/cpu/environ/environ_cpu_create.h"
+#include "backend/kernel_compiler/environ_manager.h"
+#include "backend/kernel_compiler/common_utils.h"
+
+namespace mindspore {
+namespace kernel {
+const std::vector<size_t> &EnvironCreateCPUKernel::GetInputSizeList() const { return input_size_list_; }
+
+const std::vector<size_t> &EnvironCreateCPUKernel::GetOutputSizeList() const { return output_size_list_; }
+
+const std::vector<size_t> &EnvironCreateCPUKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
+
+void EnvironCreateCPUKernel::InitKernel(const CNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(node);
+  // Check the output handle.
+  auto handle_type = AnfAlgo::GetOutputDeviceDataType(node, 0);
+  auto handle_shapes = AnfAlgo::GetOutputDeviceShape(node, 0);
+  if (!EnvironMgr::GetInstance().IsScalarTensor(handle_type, handle_shapes)) {
+    MS_LOG(EXCEPTION) << "The output handle checks invalid, kernel: " << node->fullname_with_scope();
+  }
+
+  handle_size_ = sizeof(int64_t);
+  output_size_list_.push_back(handle_size_);
+}
+
+bool EnvironCreateCPUKernel::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
+                                    const std::vector<AddressPtr> &outputs) {
+  // Generate an unique handle.
+  int64_t env_handle = EnvironMgr::GetInstance().Create();
+
+  auto output = GetDeviceAddress<int64_t>(outputs, 0);
+  output[0] = env_handle;
+  MS_LOG(DEBUG) << "Create env handle: " << output[0];
+
+  return true;
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/environ/environ_cpu_create.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/environ/environ_cpu_create.h
@ -0,0 +1,50 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ENVIRON_ENVIRON_CPU_CREATE_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ENVIRON_ENVIRON_CPU_CREATE_H_
+
+#include <vector>
+#include <string>
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+class EnvironCreateCPUKernel : public CPUKernel {
+ public:
+  EnvironCreateCPUKernel() : handle_size_(0) {}
+  ~EnvironCreateCPUKernel() = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override;
+  const std::vector<size_t> &GetOutputSizeList() const override;
+  const std::vector<size_t> &GetWorkspaceSizeList() const override;
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs) override;
+  void InitKernel(const CNodePtr &node) override;
+
+ private:
+  size_t handle_size_;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+
+MS_REG_CPU_KERNEL(EnvironCreate, KernelAttr().AddOutputAttr(kNumberTypeInt64), EnvironCreateCPUKernel);
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ENVIRON_ENVIRON_CPU_CREATE_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/environ/environ_cpu_get.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/environ/environ_cpu_get.cc
@ -0,0 +1,101 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/cpu/environ/environ_cpu_get.h"
+#include "backend/kernel_compiler/environ_manager.h"
+#include "backend/kernel_compiler/common_utils.h"
+
+namespace mindspore {
+namespace kernel {
+const std::vector<size_t> &EnvironGetCPUKernel::GetInputSizeList() const { return input_size_list_; }
+
+const std::vector<size_t> &EnvironGetCPUKernel::GetOutputSizeList() const { return output_size_list_; }
+
+const std::vector<size_t> &EnvironGetCPUKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
+
+void EnvironGetCPUKernel::InitKernel(const CNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(node);
+  if (!EnvironMgr::GetInstance().CheckEnvInput(node)) {
+    MS_LOG(EXCEPTION) << "The input checks invalid, kernel: " << node->fullname_with_scope();
+  }
+
+  value_type_attr_ = TypeId(AnfAlgo::GetNodeAttr<int>(node, kEnvValueTypeAttr));
+  handle_size_ = sizeof(int64_t);
+  key_size_ = sizeof(int64_t);
+
+  auto value_type = AnfAlgo::GetOutputDeviceDataType(node, 0);
+  auto value_shapes = AnfAlgo::GetOutputDeviceShape(node, 0);
+  auto default_value_type = AnfAlgo::GetInputDeviceDataType(node, 2);
+  auto default_value_shapes = AnfAlgo::GetInputDeviceShape(node, 2);
+  if ((value_type != default_value_type) || (value_shapes != default_value_shapes)) {
+    MS_LOG(EXCEPTION) << "The env value checks invalid, kernel: " << node->fullname_with_scope();
+  }
+  value_size_ = GetTypeByte(TypeIdToType(value_type));
+  for (auto &i : value_shapes) {
+    value_size_ *= i;
+  }
+
+  input_size_list_.push_back(handle_size_);
+  input_size_list_.push_back(key_size_);
+  input_size_list_.push_back(value_size_);
+  output_size_list_.push_back(value_size_);
+}
+
+bool EnvironGetCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+                                 const std::vector<AddressPtr> &outputs) {
+  auto input_handle = GetDeviceAddress<int64_t>(inputs, 0);
+  auto input_key = GetDeviceAddress<int64_t>(inputs, 1);
+  auto input_default_value = GetDeviceAddress<void>(inputs, 2);
+  auto output_value = GetDeviceAddress<int64_t>(outputs, 0);
+
+  // Get host handle and host key.
+  int64_t host_handle = input_handle[0];
+  int64_t host_key = input_key[0];
+
+  // Get env and value by handle and key.
+  const auto &env = EnvironMgr::GetInstance().Get(host_handle);
+  MS_EXCEPTION_IF_NULL(env);
+  const auto &env_value = env->Get(host_key);
+  // Default value.
+  auto value = input_default_value;
+  auto value_size = inputs[2]->size;
+  auto value_type = value_type_attr_;
+  if (env_value != nullptr) {
+    value = env_value->addr_;
+    value_size = env_value->size_;
+    value_type = env_value->value_type_;
+  } else {
+    auto node = cnode_ptr_.lock();
+    const std::string &prim_name = (node == nullptr) ? "" : AnfAlgo::GetCNodeName(node);
+    MS_LOG(INFO) << "Use the default input value for kernel: " << prim_name << ", env handle: " << host_handle
+                 << ", env key: " << host_key;
+  }
+
+  // Check the env value size and type. The value size may be aligned, so must be greater then value_size_.
+  if ((value_size < value_size_) || (value_type != value_type_attr_)) {
+    MS_LOG(ERROR) << "The env value checks invalid, value_size: " << value_size << ", value_size_: " << value_size_
+                  << ", value_type: " << value_type << ", value_type_attr_: " << value_type_attr_;
+    return false;
+  }
+
+  auto ret = memcpy_s(output_value, value_size_, value, value_size_);
+  if (ret != 0) {
+    MS_LOG(EXCEPTION) << "Output memcpy error: " << ret;
+  }
+  return true;
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/environ/environ_cpu_get.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/environ/environ_cpu_get.h
@ -0,0 +1,124 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ENVIRON_ENVIRON_CPU_GET_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ENVIRON_ENVIRON_CPU_GET_H_
+
+#include <vector>
+#include <string>
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+class EnvironGetCPUKernel : public CPUKernel {
+ public:
+  EnvironGetCPUKernel() : value_type_attr_(kObjectTypeTensorType), handle_size_(0), key_size_(0), value_size_(0) {}
+  ~EnvironGetCPUKernel() = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override;
+  const std::vector<size_t> &GetOutputSizeList() const override;
+  const std::vector<size_t> &GetWorkspaceSizeList() const override;
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs) override;
+  void InitKernel(const CNodePtr &node) override;
+
+ private:
+  // The type of env tensor get.
+  TypeId value_type_attr_;
+
+  size_t handle_size_;
+  size_t key_size_;
+  size_t value_size_;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+
+MS_REG_CPU_KERNEL(EnvironGet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddOutputAttr(kNumberTypeInt64),
+                  EnvironGetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironGet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt32)
+                    .AddOutputAttr(kNumberTypeInt32),
+                  EnvironGetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironGet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt16)
+                    .AddOutputAttr(kNumberTypeInt16),
+                  EnvironGetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironGet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeUInt32)
+                    .AddOutputAttr(kNumberTypeUInt32),
+                  EnvironGetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironGet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeUInt16)
+                    .AddOutputAttr(kNumberTypeUInt16),
+                  EnvironGetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironGet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeUInt8)
+                    .AddOutputAttr(kNumberTypeUInt8),
+                  EnvironGetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironGet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeUInt64)
+                    .AddOutputAttr(kNumberTypeUInt64),
+                  EnvironGetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironGet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeFloat32)
+                    .AddOutputAttr(kNumberTypeFloat32),
+                  EnvironGetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironGet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeFloat16)
+                    .AddOutputAttr(kNumberTypeFloat16),
+                  EnvironGetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironGet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeBool)
+                    .AddOutputAttr(kNumberTypeBool),
+                  EnvironGetCPUKernel);
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ENVIRON_ENVIRON_CPU_GET_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/environ/environ_cpu_set.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/environ/environ_cpu_set.cc
@ -0,0 +1,91 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/cpu/environ/environ_cpu_set.h"
+#include "backend/kernel_compiler/environ_manager.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/hardware/cpu/cpu_memory_pool.h"
+
+namespace mindspore {
+namespace kernel {
+const std::vector<size_t> &EnvironSetCPUKernel::GetInputSizeList() const { return input_size_list_; }
+
+const std::vector<size_t> &EnvironSetCPUKernel::GetOutputSizeList() const { return output_size_list_; }
+
+const std::vector<size_t> &EnvironSetCPUKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
+
+void EnvironSetCPUKernel::InitKernel(const CNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(node);
+  if (!EnvironMgr::GetInstance().CheckEnvInput(node)) {
+    MS_LOG(EXCEPTION) << "The input checks invalid, kernel: " << node->fullname_with_scope();
+  }
+
+  // Check the output handle.
+  auto handle_type = AnfAlgo::GetOutputDeviceDataType(node, 0);
+  auto handle_shapes = AnfAlgo::GetOutputDeviceShape(node, 0);
+  if (!EnvironMgr::GetInstance().IsScalarTensor(handle_type, handle_shapes)) {
+    MS_LOG(EXCEPTION) << "The output handle checks invalid, kernel: " << node->fullname_with_scope();
+  }
+
+  value_type_attr_ = TypeId(AnfAlgo::GetNodeAttr<int>(node, kEnvValueTypeAttr));
+  handle_size_ = sizeof(int64_t);
+  key_size_ = sizeof(int64_t);
+
+  auto value_type = AnfAlgo::GetInputDeviceDataType(node, 2);
+  auto value_shapes = AnfAlgo::GetInputDeviceShape(node, 2);
+  value_size_ = GetTypeByte(TypeIdToType(value_type));
+  for (auto &i : value_shapes) {
+    value_size_ *= i;
+  }
+
+  input_size_list_.push_back(handle_size_);
+  input_size_list_.push_back(key_size_);
+  input_size_list_.push_back(value_size_);
+  output_size_list_.push_back(handle_size_);
+}
+
+bool EnvironSetCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+                                 const std::vector<AddressPtr> &outputs) {
+  auto input_handle = GetDeviceAddress<int64_t>(inputs, 0);
+  auto input_key = GetDeviceAddress<int64_t>(inputs, 1);
+  auto input_value = GetDeviceAddress<void>(inputs, 2);
+  auto output_handle = GetDeviceAddress<int64_t>(outputs, 0);
+  if (input_handle != output_handle) {
+    MS_LOG(EXCEPTION) << "The EnvSet is ref kernel and the output handle is not equal of input handle.";
+  }
+
+  // Get host handle and host key.
+  int64_t host_handle = input_handle[0];
+  int64_t host_key = input_key[0];
+
+  // Alloc the value address, and free in the step end.
+  auto value_ptr = device::cpu::CPUMemoryPool::GetInstance().AllocTensorMem(value_size_);
+  MS_EXCEPTION_IF_NULL(value_ptr);
+  auto ret = memcpy_s(value_ptr, value_size_, input_value, value_size_);
+  if (ret != 0) {
+    MS_LOG(EXCEPTION) << "Input value memcpy error: " << ret;
+  }
+
+  // Set env member.
+  const auto &env = EnvironMgr::GetInstance().Get(host_handle);
+  MS_EXCEPTION_IF_NULL(env);
+  auto env_value = std::make_shared<EnvironValue>(value_ptr, value_size_, value_type_attr_, kGPUDevice);
+  env->Set(host_key, env_value);
+
+  return true;
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/environ/environ_cpu_set.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/environ/environ_cpu_set.h
@ -0,0 +1,125 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ENVIRON_ENVIRON_CPU_SET_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ENVIRON_ENVIRON_CPU_SET_H_
+
+#include <vector>
+#include <string>
+#include <memory>
+#include "backend/kernel_compiler/cpu/cpu_kernel.h"
+#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+class EnvironSetCPUKernel : public CPUKernel {
+ public:
+  EnvironSetCPUKernel() : value_type_attr_(kObjectTypeTensorType), handle_size_(0), key_size_(0), value_size_(0) {}
+  ~EnvironSetCPUKernel() = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override;
+  const std::vector<size_t> &GetOutputSizeList() const override;
+  const std::vector<size_t> &GetWorkspaceSizeList() const override;
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs) override;
+  void InitKernel(const CNodePtr &node) override;
+
+ private:
+  // The type of env tensor set.
+  TypeId value_type_attr_;
+
+  size_t handle_size_;
+  size_t key_size_;
+  size_t value_size_;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+
+MS_REG_CPU_KERNEL(EnvironSet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddOutputAttr(kNumberTypeInt64),
+                  EnvironSetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironSet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt32)
+                    .AddOutputAttr(kNumberTypeInt64),
+                  EnvironSetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironSet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt16)
+                    .AddOutputAttr(kNumberTypeInt64),
+                  EnvironSetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironSet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeUInt32)
+                    .AddOutputAttr(kNumberTypeInt64),
+                  EnvironSetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironSet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeUInt16)
+                    .AddOutputAttr(kNumberTypeInt64),
+                  EnvironSetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironSet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeUInt8)
+                    .AddOutputAttr(kNumberTypeInt64),
+                  EnvironSetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironSet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeUInt64)
+                    .AddOutputAttr(kNumberTypeInt64),
+                  EnvironSetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironSet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeFloat32)
+                    .AddOutputAttr(kNumberTypeInt64),
+                  EnvironSetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironSet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeFloat16)
+                    .AddOutputAttr(kNumberTypeInt64),
+                  EnvironSetCPUKernel);
+MS_REG_CPU_KERNEL(EnvironSet,
+                  KernelAttr()
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeInt64)
+                    .AddInputAttr(kNumberTypeBool)
+                    .AddOutputAttr(kNumberTypeInt64),
+                  EnvironSetCPUKernel);
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ENVIRON_ENVIRON_CPU_SET_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/environ.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/environ.h
@ -0,0 +1,91 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_ENVIRON_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_ENVIRON_H_
+
+#include <vector>
+#include <string>
+#include <memory>
+#include <map>
+#include "backend/kernel_compiler/kernel.h"
+#include "ir/dtype/type_id.h"
+#include "utils/ms_context.h"
+#include "runtime/hardware/device_context_manager.h"
+
+namespace mindspore {
+namespace kernel {
+constexpr auto kEnvValueTypeAttr = "value_type";
+
+struct EnvironValue {
+  EnvironValue() : addr_(nullptr), size_(0), value_type_(kObjectTypeTensorType), device_name_(""), device_id_(0) {}
+  EnvironValue(void *address_addr, size_t address_size, TypeId value_type, const std::string &device_name)
+      : addr_(address_addr), size_(address_size), value_type_(value_type), device_name_(device_name) {
+    auto context_ptr = MsContext::GetInstance();
+    MS_EXCEPTION_IF_NULL(context_ptr);
+    device_id_ = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
+  }
+
+  void *addr_;
+  size_t size_;
+  TypeId value_type_;
+
+  // The device name and id are used to find the hardware to free the addr.
+  std::string device_name_;
+  uint32_t device_id_;
+};
+using EnvironValuePtr = std::shared_ptr<EnvironValue>;
+
+// Environ is the meaning expression of map.
+class Environ {
+ public:
+  explicit Environ(int64_t handle) : handle_(handle) {}
+  virtual ~Environ() = default;
+
+  void Set(int64_t key, const EnvironValuePtr &value) { values_[key] = value; }
+
+  EnvironValuePtr Get(int64_t key) {
+    if (values_.count(key) > 0) {
+      return values_[key];
+    }
+    return nullptr;
+  }
+
+  void Clear() {
+    // Foreach values to free the value addr.
+    for (auto &value : values_) {
+      MS_EXCEPTION_IF_NULL(value.second);
+      const auto &device_context = device::DeviceContextManager::GetInstance().GetOrCreateDeviceContext(
+        {value.second->device_name_, value.second->device_id_});
+      MS_EXCEPTION_IF_NULL(device_context);
+      device_context->FreeMemory(value.second->addr_);
+    }
+
+    values_.clear();
+  }
+
+ private:
+  // The handle is unique for each env.
+  int64_t handle_;
+
+  // Store the tensors in map, as <key, tensor>.
+  std::map<int64_t, EnvironValuePtr> values_;
+};
+using EnvironPtr = std::shared_ptr<Environ>;
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_ENVIRON_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/environ_manager.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/environ_manager.cc
@ -0,0 +1,115 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/environ_manager.h"
+#include "utils/ms_utils.h"
+#include "utils/log_adapter.h"
+
+namespace mindspore {
+namespace kernel {
+constexpr auto kScalarTensorShapeDim = 1;
+constexpr auto kScalarTensorShapeSize = 1;
+
+int64_t EnvironMgr::Create() {
+  mutex.lock();
+  if (env_handles_count_ >= INT64_MAX) {
+    MS_LOG(EXCEPTION) << " The handles number is out of range: " << env_handles_count_;
+  }
+  int64_t ret_handle = ++env_handles_count_;
+  auto env = std::make_shared<Environ>(ret_handle);
+  MS_EXCEPTION_IF_NULL(env);
+  envs_[ret_handle] = env;
+  mutex.unlock();
+
+  return ret_handle;
+}
+
+EnvironPtr EnvironMgr::Get(int64_t handle) {
+  mutex.lock_shared();
+  if (envs_.count(handle) > 0) {
+    return envs_[handle];
+  } else {
+    return nullptr;
+  }
+  mutex.unlock();
+}
+
+void EnvironMgr::Clear() {
+  for (auto &env : envs_) {
+    MS_EXCEPTION_IF_NULL(env.second);
+    env.second->Clear();
+  }
+
+  envs_.clear();
+}
+
+bool EnvironMgr::CheckEnvInput(const CNodePtr &kernel_node) {
+  MS_EXCEPTION_IF_NULL(kernel_node);
+  // Check the value type attr.
+  auto value_type_attr = TypeId(AnfAlgo::GetNodeAttr<int>(kernel_node, kEnvValueTypeAttr));
+  if ((value_type_attr != kObjectTypeTensorType) && (value_type_attr != kObjectTypeEnvType)) {
+    MS_LOG(ERROR) << "The value type is not supported: " << value_type_attr
+                  << ", kernel: " << kernel_node->fullname_with_scope();
+    return false;
+  }
+
+  // Check the input handle.
+  auto handle_type = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
+  auto handle_shapes = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
+  if (!IsScalarTensor(handle_type, handle_shapes)) {
+    MS_LOG(ERROR) << "The input handle checks invalid, kernel: " << kernel_node->fullname_with_scope();
+    return false;
+  }
+
+  // Check the input key.
+  auto key_type = AnfAlgo::GetInputDeviceDataType(kernel_node, 1);
+  auto key_shapes = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
+  if (!IsScalarTensor(key_type, key_shapes)) {
+    MS_LOG(ERROR) << "The input key checks invalid, kernel: " << kernel_node->fullname_with_scope();
+    return false;
+  }
+
+  // Check the input value.
+  auto value_type = AnfAlgo::GetInputDeviceDataType(kernel_node, 2);
+  auto value_shapes = AnfAlgo::GetInputDeviceShape(kernel_node, 2);
+  if ((value_type_attr == kObjectTypeEnvType) && (!IsScalarTensor(value_type, value_shapes))) {
+    MS_LOG(ERROR) << "The input value checks invalid, kernel: " << kernel_node->fullname_with_scope();
+    return false;
+  }
+
+  return true;
+}
+
+bool EnvironMgr::IsScalarTensor(TypeId type, std::vector<size_t> shape) {
+  if (type == kObjectTypeTensorType) {
+    MS_LOG(ERROR) << "The type is invalid: " << type;
+    return false;
+  }
+
+  if (shape.size() != kScalarTensorShapeDim) {
+    MS_LOG(ERROR) << "The shape size is invalid: " << shape.size();
+    return false;
+  }
+
+  if (shape[0] != kScalarTensorShapeSize) {
+    MS_LOG(ERROR) << "The shape is invalid: " << shape[0];
+    return false;
+  }
+
+  return true;
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/environ_manager.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/environ_manager.h
@ -0,0 +1,63 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_ENVIRON_MANAGER_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_ENVIRON_MANAGER_H_
+
+#include <utility>
+#include <map>
+#include <memory>
+#include <vector>
+#include <shared_mutex>
+#include "backend/kernel_compiler/environ.h"
+
+namespace mindspore {
+namespace kernel {
+class EnvironMgr {
+ public:
+  static EnvironMgr &GetInstance() noexcept {
+    static EnvironMgr instance;
+    return instance;
+  }
+
+  // Create the env object and return the unique env handle.
+  int64_t Create();
+
+  EnvironPtr Get(int64_t handle);
+
+  void Clear();
+
+  // Check whether the inputs of EnvironGet kernel or EnvironSet kernel are valid.
+  bool CheckEnvInput(const CNodePtr &kernel_node);
+  // Check whether is scalar tensor. Environ handle and env key only support scalar tensor currently.
+  bool IsScalarTensor(TypeId type, std::vector<size_t> shape);
+
+ private:
+  EnvironMgr() = default;
+  ~EnvironMgr() = default;
+  DISABLE_COPY_AND_ASSIGN(EnvironMgr);
+
+  // Store the envs in map, as <handle, env>.
+  std::map<int64_t, EnvironPtr> envs_;
+
+  int64_t env_handles_count_{0};
+
+  std::shared_mutex mutex;
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_ENVIRON_MANAGER_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/environ/environ_gpu_create.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/environ/environ_gpu_create.cc
@ -0,0 +1,62 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/environ/environ_gpu_create.h"
+#include "backend/kernel_compiler/environ_manager.h"
+#include "backend/kernel_compiler/common_utils.h"
+
+namespace mindspore {
+namespace kernel {
+const std::vector<size_t> &EnvironCreateGpuKernel::GetInputSizeList() const { return input_size_list_; }
+
+const std::vector<size_t> &EnvironCreateGpuKernel::GetOutputSizeList() const { return output_size_list_; }
+
+const std::vector<size_t> &EnvironCreateGpuKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
+
+bool EnvironCreateGpuKernel::Init(const CNodePtr &kernel_node) {
+  MS_EXCEPTION_IF_NULL(kernel_node);
+  // Check the output handle.
+  auto handle_type = AnfAlgo::GetOutputDeviceDataType(kernel_node, 0);
+  auto handle_shapes = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
+  if (!EnvironMgr::GetInstance().IsScalarTensor(handle_type, handle_shapes)) {
+    MS_LOG(ERROR) << "The output handle checks invalid, kernel: " << kernel_node->fullname_with_scope();
+    return false;
+  }
+  handle_size_ = sizeof(int64_t);
+
+  InitSizeLists();
+  return true;
+}
+
+void EnvironCreateGpuKernel::InitSizeLists() { output_size_list_.push_back(handle_size_); }
+
+bool EnvironCreateGpuKernel::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
+                                    const std::vector<AddressPtr> &outputs, void *stream_ptr) {
+  auto output = GetDeviceAddress<int64_t>(outputs, 0);
+
+  // Generate an unique handle.
+  int64_t env_handle = EnvironMgr::GetInstance().Create();
+  MS_LOG(DEBUG) << "Create env handle: " << env_handle;
+
+  // Copy handle to output.
+  CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
+                             cudaMemcpyAsync(output, &env_handle, handle_size_, cudaMemcpyHostToDevice,
+                                             reinterpret_cast<cudaStream_t>(stream_ptr)),
+                             "Copy env handle failed.");
+  return true;
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/environ/environ_gpu_create.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/environ/environ_gpu_create.h
@ -0,0 +1,53 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ENVIRON_ENVIRON_GPU_CREATE_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ENVIRON_ENVIRON_GPU_CREATE_H_
+
+#include <vector>
+#include <string>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+class EnvironCreateGpuKernel : public GpuKernel {
+ public:
+  EnvironCreateGpuKernel() : handle_size_(0) {}
+  ~EnvironCreateGpuKernel() = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override;
+  const std::vector<size_t> &GetOutputSizeList() const override;
+  const std::vector<size_t> &GetWorkspaceSizeList() const override;
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
+  bool Init(const CNodePtr &kernel_node) override;
+
+ protected:
+  void InitSizeLists() override;
+
+ private:
+  size_t handle_size_;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+
+MS_REG_GPU_KERNEL(EnvironCreate, EnvironCreateGpuKernel)
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ENVIRON_ENVIRON_GPU_CREATE_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/environ/environ_gpu_get.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/environ/environ_gpu_get.cc
@ -0,0 +1,119 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/environ/environ_gpu_get.h"
+#include "backend/kernel_compiler/environ_manager.h"
+#include "backend/kernel_compiler/common_utils.h"
+
+namespace mindspore {
+namespace kernel {
+const std::vector<size_t> &EnvironGetGpuKernel::GetInputSizeList() const { return input_size_list_; }
+
+const std::vector<size_t> &EnvironGetGpuKernel::GetOutputSizeList() const { return output_size_list_; }
+
+const std::vector<size_t> &EnvironGetGpuKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
+
+bool EnvironGetGpuKernel::Init(const CNodePtr &kernel_node) {
+  MS_EXCEPTION_IF_NULL(kernel_node);
+  if (!EnvironMgr::GetInstance().CheckEnvInput(kernel_node)) {
+    MS_LOG(ERROR) << "The input checks invalid, kernel: " << kernel_node->fullname_with_scope();
+    return false;
+  }
+
+  value_type_attr_ = TypeId(AnfAlgo::GetNodeAttr<int>(kernel_node, kEnvValueTypeAttr));
+  handle_size_ = sizeof(int64_t);
+  key_size_ = sizeof(int64_t);
+
+  auto value_type = AnfAlgo::GetOutputDeviceDataType(kernel_node, 0);
+  auto value_shapes = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
+  auto default_value_type = AnfAlgo::GetInputDeviceDataType(kernel_node, 2);
+  auto default_value_shapes = AnfAlgo::GetInputDeviceShape(kernel_node, 2);
+  if ((value_type != default_value_type) || (value_shapes != default_value_shapes)) {
+    MS_LOG(ERROR) << "The env value checks invalid, kernel: " << kernel_node->fullname_with_scope();
+    return false;
+  }
+  value_size_ = GetTypeByte(TypeIdToType(value_type));
+  for (auto &i : value_shapes) {
+    value_size_ *= i;
+  }
+
+  InitSizeLists();
+  return true;
+}
+
+void EnvironGetGpuKernel::InitSizeLists() {
+  input_size_list_.push_back(handle_size_);
+  input_size_list_.push_back(key_size_);
+  input_size_list_.push_back(value_size_);
+  output_size_list_.push_back(value_size_);
+}
+
+bool EnvironGetGpuKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+                                 const std::vector<AddressPtr> &outputs, void *stream_ptr) {
+  auto input_handle = GetDeviceAddress<int64_t>(inputs, 0);
+  auto input_key = GetDeviceAddress<int64_t>(inputs, 1);
+  auto input_default_value = GetDeviceAddress<void>(inputs, 2);
+  auto output_value = GetDeviceAddress<int64_t>(outputs, 0);
+
+  // Get host handle and host key.
+  int64_t host_handle = 0;
+  int64_t host_key = 0;
+  CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
+                             cudaMemcpyAsync(&host_handle, input_handle, handle_size_, cudaMemcpyDeviceToHost,
+                                             reinterpret_cast<cudaStream_t>(stream_ptr)),
+                             "Get handle failed.");
+  CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
+                             cudaMemcpyAsync(&host_key, input_key, key_size_, cudaMemcpyDeviceToHost,
+                                             reinterpret_cast<cudaStream_t>(stream_ptr)),
+                             "Get key failed.");
+  CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr)),
+                             "Sync stream failed.");
+
+  // Get env and value by handle and key.
+  const auto &env = EnvironMgr::GetInstance().Get(host_handle);
+  MS_EXCEPTION_IF_NULL(env);
+  const auto &env_value = env->Get(host_key);
+  // Default value.
+  auto value = input_default_value;
+  auto value_size = inputs[2]->size;
+  auto value_type = value_type_attr_;
+  if (env_value != nullptr) {
+    value = env_value->addr_;
+    value_size = env_value->size_;
+    value_type = env_value->value_type_;
+  } else {
+    auto kernel_node = kernel_node_.lock();
+    const std::string &prim_name = (kernel_node == nullptr) ? "" : AnfAlgo::GetCNodeName(kernel_node);
+    MS_LOG(INFO) << "Use the default input value for kernel: " << prim_name << ", env handle: " << host_handle
+                 << ", env key: " << host_key;
+  }
+
+  // Check the env value size and type. The value size may be aligned, so must be greater then value_size_.
+  if ((value_size < value_size_) || (value_type != value_type_attr_)) {
+    MS_LOG(ERROR) << "The env value checks invalid, value_size: " << value_size << ", value_size_: " << value_size_
+                  << ", value_type: " << value_type << ", value_type_attr_: " << value_type_attr_;
+    return false;
+  }
+
+  CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
+                             cudaMemcpyAsync(output_value, value, value_size_, cudaMemcpyDeviceToDevice,
+                                             reinterpret_cast<cudaStream_t>(stream_ptr)),
+                             "Copy value failed.");
+
+  return true;
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/environ/environ_gpu_get.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/environ/environ_gpu_get.h
@ -0,0 +1,58 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ENVIRON_ENVIRON_GPU_GET_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ENVIRON_ENVIRON_GPU_GET_H_
+
+#include <vector>
+#include <string>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+class EnvironGetGpuKernel : public GpuKernel {
+ public:
+  EnvironGetGpuKernel() : value_type_attr_(kObjectTypeTensorType), handle_size_(0), key_size_(0), value_size_(0) {}
+  ~EnvironGetGpuKernel() = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override;
+  const std::vector<size_t> &GetOutputSizeList() const override;
+  const std::vector<size_t> &GetWorkspaceSizeList() const override;
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
+  bool Init(const CNodePtr &kernel_node) override;
+
+ protected:
+  void InitSizeLists() override;
+
+ private:
+  // The type of env tensor get.
+  TypeId value_type_attr_;
+
+  size_t handle_size_;
+  size_t key_size_;
+  size_t value_size_;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+
+MS_REG_GPU_KERNEL(EnvironGet, EnvironGetGpuKernel)
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ENVIRON_ENVIRON_GPU_GET_H_
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/environ/environ_gpu_set.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/environ/environ_gpu_set.cc
@ -0,0 +1,108 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "backend/kernel_compiler/gpu/environ/environ_gpu_set.h"
+#include "backend/kernel_compiler/environ_manager.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "runtime/device/gpu/gpu_memory_allocator.h"
+
+namespace mindspore {
+namespace kernel {
+const std::vector<size_t> &EnvironSetGpuKernel::GetInputSizeList() const { return input_size_list_; }
+
+const std::vector<size_t> &EnvironSetGpuKernel::GetOutputSizeList() const { return output_size_list_; }
+
+const std::vector<size_t> &EnvironSetGpuKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
+
+bool EnvironSetGpuKernel::Init(const CNodePtr &kernel_node) {
+  MS_EXCEPTION_IF_NULL(kernel_node);
+  if (!EnvironMgr::GetInstance().CheckEnvInput(kernel_node)) {
+    MS_LOG(ERROR) << "The input checks invalid, kernel: " << kernel_node->fullname_with_scope();
+    return false;
+  }
+
+  // Check the output handle.
+  auto handle_type = AnfAlgo::GetOutputDeviceDataType(kernel_node, 0);
+  auto handle_shapes = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
+  if (!EnvironMgr::GetInstance().IsScalarTensor(handle_type, handle_shapes)) {
+    MS_LOG(ERROR) << "The output handle checks invalid, kernel: " << kernel_node->fullname_with_scope();
+    return false;
+  }
+
+  value_type_attr_ = TypeId(AnfAlgo::GetNodeAttr<int>(kernel_node, kEnvValueTypeAttr));
+  handle_size_ = sizeof(int64_t);
+  key_size_ = sizeof(int64_t);
+
+  auto value_type = AnfAlgo::GetInputDeviceDataType(kernel_node, 2);
+  auto value_shapes = AnfAlgo::GetInputDeviceShape(kernel_node, 2);
+  value_size_ = GetTypeByte(TypeIdToType(value_type));
+  for (auto &i : value_shapes) {
+    value_size_ *= i;
+  }
+
+  InitSizeLists();
+  return true;
+}
+
+void EnvironSetGpuKernel::InitSizeLists() {
+  input_size_list_.push_back(handle_size_);
+  input_size_list_.push_back(key_size_);
+  input_size_list_.push_back(value_size_);
+  output_size_list_.push_back(handle_size_);
+}
+
+bool EnvironSetGpuKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+                                 const std::vector<AddressPtr> &outputs, void *stream_ptr) {
+  auto input_handle = GetDeviceAddress<int64_t>(inputs, 0);
+  auto input_key = GetDeviceAddress<int64_t>(inputs, 1);
+  auto input_value = GetDeviceAddress<void>(inputs, 2);
+  auto output_handle = GetDeviceAddress<int64_t>(outputs, 0);
+  if (input_handle != output_handle) {
+    MS_LOG(EXCEPTION) << "The EnvSet is ref kernel and the output handle is not equal of input handle.";
+  }
+
+  // Get host handle and host key.
+  int64_t host_handle = 0;
+  int64_t host_key = 0;
+  CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
+                             cudaMemcpyAsync(&host_handle, input_handle, handle_size_, cudaMemcpyDeviceToHost,
+                                             reinterpret_cast<cudaStream_t>(stream_ptr)),
+                             "Get handle failed.");
+  CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
+                             cudaMemcpyAsync(&host_key, input_key, key_size_, cudaMemcpyDeviceToHost,
+                                             reinterpret_cast<cudaStream_t>(stream_ptr)),
+                             "Get key failed.");
+  CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream_ptr)),
+                             "Sync stream failed.");
+
+  // Alloc the value address, and free in the step end.
+  auto value_ptr = device::gpu::GPUMemoryAllocator::GetInstance().AllocTensorMem(value_size_);
+  MS_EXCEPTION_IF_NULL(value_ptr);
+  CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
+                             cudaMemcpyAsync(value_ptr, input_value, value_size_, cudaMemcpyDeviceToDevice,
+                                             reinterpret_cast<cudaStream_t>(stream_ptr)),
+                             "Copy value failed.");
+
+  // Set env member.
+  const auto &env = EnvironMgr::GetInstance().Get(host_handle);
+  MS_EXCEPTION_IF_NULL(env);
+  auto env_value = std::make_shared<EnvironValue>(value_ptr, value_size_, value_type_attr_, kGPUDevice);
+  env->Set(host_key, env_value);
+
+  return true;
+}
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/environ/environ_gpu_set.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/environ/environ_gpu_set.h
@ -0,0 +1,59 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ENVIRON_ENVIRON_GPU_SET_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ENVIRON_ENVIRON_GPU_SET_H_
+
+#include <vector>
+#include <string>
+#include <memory>
+#include "backend/kernel_compiler/gpu/gpu_kernel.h"
+#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
+
+namespace mindspore {
+namespace kernel {
+class EnvironSetGpuKernel : public GpuKernel {
+ public:
+  EnvironSetGpuKernel() : value_type_attr_(kObjectTypeTensorType), handle_size_(0), key_size_(0), value_size_(0) {}
+  ~EnvironSetGpuKernel() = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override;
+  const std::vector<size_t> &GetOutputSizeList() const override;
+  const std::vector<size_t> &GetWorkspaceSizeList() const override;
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
+  bool Init(const CNodePtr &kernel_node) override;
+
+ protected:
+  void InitSizeLists() override;
+
+ private:
+  // The type of env tensor set.
+  TypeId value_type_attr_;
+
+  size_t handle_size_;
+  size_t key_size_;
+  size_t value_size_;
+
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+};
+
+MS_REG_GPU_KERNEL(EnvironSet, EnvironSetGpuKernel)
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ENVIRON_ENVIRON_GPU_SET_H_
--- a/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.cc
@ -23,6 +23,7 @@
 #include "runtime/framework/actor/control_flow/entrance_actor.h"
 #include "mindrt/include/async/async.h"
 #include "utils/log_adapter.h"
+#include "backend/kernel_compiler/environ_manager.h"

 namespace mindspore {
 namespace runtime {
@ -76,6 +77,9 @@ void LoopCountActor::SendOutput(OpContext<DeviceTensor> *const context) {
    ActorDispatcher::Send(entrance_aid, &EntranceActor::ClearDataOnStepEnd, from_aid, context);
  }

+  // Clear the global data which are generated in the kernel running.
+  kernel::EnvironMgr::GetInstance().Clear();
+
  // The LoopCountActor exits.
  if (current_count_ == loop_count_) {
    current_count_ = 0;
--- a/mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.cc
+++ b/mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.cc
@ -565,6 +565,19 @@ bool AscendDeviceContext::AllocateContinuousMemory(const std::vector<DeviceAddre
  return mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list);
 }

+void *AscendDeviceContext::AllocateMemory(size_t size) const {
+  MS_EXCEPTION_IF_NULL(runtime_instance_);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  runtime_instance_->SetContext();
+  return mem_manager_->MallocMemFromMemPool(size, false);
+}
+
+void AscendDeviceContext::FreeMemory(void *const ptr) const {
+  MS_EXCEPTION_IF_NULL(ptr);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  mem_manager_->FreeMemFromMemPool(ptr);
+}
+
 bool AscendDeviceContext::ExecuteGraph(const KernelGraphPtr &graph) const {
  MS_EXCEPTION_IF_NULL(graph);
  const uint64_t kUSecondInSecond = 1000000;
--- a/mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.h
+++ b/mindspore/ccsrc/runtime/hardware/ascend/ascend_device_context.h
@ -80,6 +80,9 @@ class AscendDeviceContext : public DeviceContext {
  // Relevant function to allocate and free device memory.
  bool AllocateMemory(DeviceAddress *const &address, size_t size) const override;
  void FreeMemory(DeviceAddress *const &address) const override;
+  // Relevant function to allocate and free device memory of raw ptr.
+  void *AllocateMemory(size_t size) const override;
+  void FreeMemory(void *const ptr) const override;

  // Allocate continuous device memory end to end into 'addr_list'.
  // Communication operators may need continuous memory for input and output
--- a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc
+++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc
@ -114,6 +114,17 @@ void CPUDeviceContext::FreeMemory(DeviceAddress *const &address) const {
  address->ptr_ = nullptr;
 }

+void *CPUDeviceContext::AllocateMemory(size_t size) const {
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  return mem_manager_->MallocMemFromMemPool(size, false);
+}
+
+void CPUDeviceContext::FreeMemory(void *const ptr) const {
+  MS_EXCEPTION_IF_NULL(ptr);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  mem_manager_->FreeMemFromMemPool(ptr);
+}
+
 DeviceAddressPtr CPUDeviceContext::CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format,
                                                       TypeId type_id) const {
  return std::make_shared<CPUDeviceAddress>(device_ptr, device_size, format, type_id, device_context_key_.device_name_,
--- a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h
+++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h
@ -39,6 +39,9 @@ class CPUDeviceContext : public DeviceContext {

  bool AllocateMemory(DeviceAddress *const &address, size_t size) const override;
  void FreeMemory(DeviceAddress *const &address) const override;
+  // Relevant function to allocate and free device memory of raw ptr.
+  void *AllocateMemory(size_t size) const override;
+  void FreeMemory(void *const ptr) const override;

  DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format,
                                       TypeId type_id) const override;
--- a/mindspore/ccsrc/runtime/hardware/device_context.h
+++ b/mindspore/ccsrc/runtime/hardware/device_context.h
@ -69,9 +69,12 @@ class DeviceContext {
    return default_partition_segments;
  }

-  // Relevant function to allocate and free device memory.
+  // Relevant function to allocate and free device memory of DeviceAddress.
  virtual bool AllocateMemory(DeviceAddress *const &address, size_t size) const = 0;
  virtual void FreeMemory(DeviceAddress *const &address) const = 0;
+  // Relevant function to allocate and free device memory of raw ptr.
+  virtual void *AllocateMemory(size_t size) const = 0;
+  virtual void FreeMemory(void *const ptr) const = 0;

  // Allocate continuous device memory end to end into 'addr_list'.
  // Communication operators may need continuous memory for input and output
--- a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc
+++ b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc
@ -205,6 +205,20 @@ bool GPUDeviceContext::AllocateContinuousMemory(const std::vector<DeviceAddressP
  return mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list);
 }

+void *GPUDeviceContext::AllocateMemory(size_t size) const {
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  if (!BindDeviceToCurrentThread()) {
+    return nullptr;
+  }
+  return mem_manager_->MallocMemFromMemPool(size, false);
+}
+
+void GPUDeviceContext::FreeMemory(void *const ptr) const {
+  MS_EXCEPTION_IF_NULL(mem_manager_);
+  MS_EXCEPTION_IF_NULL(ptr);
+  mem_manager_->FreeMemFromMemPool(ptr);
+}
+
 DeviceAddressPtr GPUDeviceContext::CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format,
                                                       TypeId type_id) const {
  return std::make_shared<GPUDeviceAddress>(device_ptr, device_size, format, type_id, device_context_key_.device_name_,
--- a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.h
+++ b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.h
@ -45,6 +45,9 @@ class GPUDeviceContext : public DeviceContext {
  void FreeMemory(DeviceAddress *const &address) const override;
  bool AllocateContinuousMemory(const std::vector<DeviceAddressPtr> &addr_list, size_t total_size,
                                const std::vector<size_t> &size_list) const override;
+  // Relevant function to allocate and free device memory of raw ptr.
+  void *AllocateMemory(size_t size) const override;
+  void FreeMemory(void *const ptr) const override;

  DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format,
                                       TypeId type_id) const override;