!37788 告警清理

Merge pull request !37788 from qiuzhongya/master
2022-07-13 02:35:46 +00:00 · 2022-07-13 02:35:46 +00:00 · b1c4038a5b
parent f02f317e3d 611afd8cf8
commit b1c4038a5b
9 changed files with 20 additions and 55 deletions
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hccl_kernel.cc
@ -322,7 +322,7 @@ bool HcclKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector
  op_info.outputPtr = outputs[0]->addr;
  op_info.dataType = static_cast<HcclDataType>(data_type);
  op_info.opType = static_cast<HcclReduceOp>(op_type_);
-  op_info.root = IntToUint(root_id_);
+  op_info.root = root_id_;
  op_info.count = hccl_count_;

  auto callback = [this](HcclResult status) {
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_receive.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_receive.h
@ -19,6 +19,7 @@

 #include <memory>
 #include <vector>
+
 #include "plugin/device/ascend/kernel/hccl/hccl_kernel.h"

 namespace mindspore {
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.cc
@ -158,7 +158,7 @@ bool HcomUtil::GetHcomCount(const AnfNodePtr &anf_node, const vector<HcclDataTyp
      }
      size_t actual_input_size = input_size;
      if (common::AnfAlgo::HasNodeAttr(kAttrFusion, cnode) &&
-          common::AnfAlgo::GetNodeAttr<int64_t>(anf_node, kAttrFusion)) {
+          common::AnfAlgo::GetNodeAttr<int64_t>(anf_node, kAttrFusion) != 0) {
        actual_input_size = (input_size + align_size - 1 + filled_size) / align_size * align_size;
      }
      block_size = static_cast<uint64_t>(actual_input_size / LongToSize(rank_size));
@ -219,7 +219,7 @@ bool HcomUtil::GetHcomRootId(const AnfNodePtr &anf_node, uint32_t *root_id) {
  auto primitive = common::AnfAlgo::GetCNodePrimitive(anf_node);
  MS_EXCEPTION_IF_NULL(primitive);
  if (primitive->GetAttr(kAttrRootRank) != nullptr) {
-    *root_id = (uint32_t)GetValue<int64_t>(primitive->GetAttr(kAttrRootRank));
+    *root_id = static_cast<uint32_t>(GetValue<int64_t>(primitive->GetAttr(kAttrRootRank)));
  } else {
    MS_LOG(ERROR) << "HcomUtil::Get HCOM_ATTR_ROOT_INDEX fail, not support!";
    return false;
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/hccl/hcom_util.h
@ -58,7 +58,7 @@ static map<HcclDataType, uint32_t> kConstOpHcomDataTypeSizeMap = {

 class HcomUtil {
 public:
-  static bool GetKernelInputShape(const AnfNodePtr &anf_node, vector<ShapeVector> *hccl_kernel_shape_list);
+  static bool GetKernelInputShape(const AnfNodePtr &anf_node, vector<ShapeVector> *hccl_kernel_intput_shape_list);
  static bool GetKernelOutputShape(const AnfNodePtr &anf_node, vector<ShapeVector> *hccl_kernel_shape_list);
  static ::HcclDataType ConvertHcclType(TypeId type_id);
  static bool GetHcomDataType(const AnfNodePtr &anf_node, vector<HcclDataType> *data_type_list);
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/host/dynamic_broadcast_gradient_args_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/host/dynamic_broadcast_gradient_args_kernel.cc
@ -144,14 +144,13 @@ std::vector<int64_t> GetInputShape(const CNodePtr &cnode, size_t index) {
  x_shape_value->set_device_address(address_x, false);
  x_shape_value->data_sync();

-  auto x_value = reinterpret_cast<int64_t *>(x_shape_value->data_c());
+  auto x_value = static_cast<int64_t *>(x_shape_value->data_c());
  MS_EXCEPTION_IF_NULL(x_value);
  std::vector<int64_t> input_shape = {x_value, x_value + x_num};
  return input_shape;
 }

-size_t SetOutputValue(const CNodePtr &cnode, const std::vector<std::vector<int64_t>> &grad_reduce_idx, size_t index,
-                      size_t input_num) {
+size_t SetOutputValue(const CNodePtr &cnode, const std::vector<std::vector<int64_t>> &grad_reduce_idx, size_t index) {
  std::vector<int64_t> output;
  size_t out_size = grad_reduce_idx[index].size();
  for (size_t k = 0; k < out_size; ++k) {
@ -188,7 +187,7 @@ size_t SetOutputValue(const CNodePtr &cnode, const std::vector<std::vector<int64
 }
 }  // namespace

-void DynamicBroadcastGradientArgsKernelMod::Execute() {
+void DynamicBroadcastGradientArgsKernelMod::Execute() const {
  MS_LOG(INFO) << "Execute DynamicBroadcastGradientArgsKernel Start";
  auto node = anf_node_.lock();
  MS_EXCEPTION_IF_NULL(node);
@ -205,8 +204,8 @@ void DynamicBroadcastGradientArgsKernelMod::Execute() {
  input_shapes[1] = GetInputShape(cnode, 1);
  auto grad_reduce_idx = CalculateOutput(input_shapes);

-  auto r0_size = SetOutputValue(cnode, grad_reduce_idx, 0, input_shapes[0].size());
-  auto r1_size = SetOutputValue(cnode, grad_reduce_idx, 1, input_shapes[1].size());
+  auto r0_size = SetOutputValue(cnode, grad_reduce_idx, 0);
+  auto r1_size = SetOutputValue(cnode, grad_reduce_idx, 1);

  ShapeVector r0_shp{SizeToLong(r0_size)};
  ShapeVector r1_shp{SizeToLong(r1_size)};
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/host/dynamic_broadcast_gradient_args_kernel.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/host/dynamic_broadcast_gradient_args_kernel.h
@ -15,9 +15,10 @@
 */
 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_DYNAMIC_BROADCAST_GRADIENT_ARGS_KERNEL_H_
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_DYNAMIC_BROADCAST_GRADIENT_ARGS_KERNEL_H_
-#include <vector>
 #include <memory>
 #include <string>
+#include <vector>
+
 #include "plugin/device/ascend/kernel/host/host_kernel_mod.h"

 namespace mindspore {
@ -30,7 +31,7 @@ class DynamicBroadcastGradientArgsKernelMod : public HostKernelMod {
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;

 private:
-  void Execute();
+  void Execute() const;
 };
 MS_HOST_REG_KERNEL(DynamicBroadcastGradientArgs, DynamicBroadcastGradientArgsKernelMod);
 }  // namespace kernel
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/host/dynamic_shape_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/host/dynamic_shape_kernel.cc
@ -23,7 +23,7 @@

 namespace mindspore {
 namespace kernel {
-void TensorShapeKernelMod::Execute() {
+void TensorShapeKernelMod::Execute() const {
  MS_LOG(INFO) << "Execute TensorShapeKernel Start";
  auto node = anf_node_.lock();
  MS_EXCEPTION_IF_NULL(node);
@ -64,50 +64,16 @@ void TensorShapeKernelMod::Execute() {
    if (!ret) {
      MS_LOG(EXCEPTION) << "Sync stream error!";
    }
-    output_addr->SyncHostToDevice(output_shape, LongToSize(output_tensor_for_sync->data().nbytes()),
-                                  output_tensor_for_sync->data_type(), output_tensor_for_sync->data_c(),
-                                  output_tensor_for_sync->device_info().host_format_);
+    if (!output_addr->SyncHostToDevice(output_shape, LongToSize(output_tensor_for_sync->data().nbytes()),
+                                       output_tensor_for_sync->data_type(), output_tensor_for_sync->data_c(),
+                                       output_tensor_for_sync->device_info().host_format_)) {
+      MS_LOG(EXCEPTION) << "TensorShapeKernel SyncHostToDevice failed.";
+    }
  }

  MS_LOG(INFO) << "Execute TensorShapeKernel End";
 }

-void TensorShapeKernelMod::Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
-  MS_LOG(INFO) << "Execute TensorShapeKernel Start";
-  auto node = anf_node_.lock();
-  MS_EXCEPTION_IF_NULL(node);
-  auto cnode = node->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(cnode);
-  auto input_num = common::AnfAlgo::GetInputTensorNum(cnode);
-  if (input_num != 1) {
-    MS_LOG(EXCEPTION) << "Op [" << cnode->DebugString() << "] has invalid input num, should be 1, but got " << input_num
-                      << trace::DumpSourceLines(cnode);
-  }
-
-  auto prev_output_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(cnode, 0);
-  std::vector<int64_t> output_shape = {SizeToLong(prev_output_shape.size())};
-
-  auto output_type = TypeId::kNumberTypeInt64;
-
-  auto output_tensor_for_sync = std::make_shared<tensor::Tensor>(output_type, output_shape);
-  MS_EXCEPTION_IF_NULL(output_tensor_for_sync);
-  auto data_ptr = static_cast<int64_t *>(output_tensor_for_sync->data_c());
-  for (size_t i = 0; i < prev_output_shape.size(); ++i) {
-    MS_LOG(INFO) << "DEBUG prev_output_shape[" << i << "]:" << prev_output_shape[i];
-    *(data_ptr + i) = prev_output_shape[i];
-  }
-
-  if (outputs.empty()) {
-    MS_LOG(EXCEPTION) << "Output address of DynamicShape is empty";
-  }
-  auto status = rtMemcpyAsync(outputs[0]->addr, outputs[0]->size, output_tensor_for_sync->data_c(),
-                              LongToSize(output_tensor_for_sync->data().nbytes()), RT_MEMCPY_HOST_TO_DEVICE, stream_);
-  if (status != RT_ERROR_NONE) {
-    MS_LOG(EXCEPTION) << "Execute TensorShapeKernel rtMemcpyAsync failed!";
-  }
-  MS_LOG(INFO) << "Execute TensorShapeKernel End";
-}
-
 bool TensorShapeKernelMod::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
                                  const std::vector<AddressPtr> &, void *stream_ptr) {
  auto node = anf_node_.lock();
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/host/dynamic_shape_kernel.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/host/dynamic_shape_kernel.h
@ -30,8 +30,7 @@ class TensorShapeKernelMod : public HostKernelMod {
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;

 private:
-  void Execute();
-  void Execute(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
+  void Execute() const;
 };
 MS_HOST_REG_KERNEL(DynamicShape, TensorShapeKernelMod);
 MS_HOST_REG_KERNEL(TensorShape, TensorShapeKernelMod);
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/host/host_kernel_metadata.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/host/host_kernel_metadata.h
@ -17,7 +17,6 @@
 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_META_DATA_H_
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_META_DATA_H_

-#include <string>
 #include <vector>
 #include <memory>
 #include "kernel/kernel_build_info.h"