!31584 [feat][assistant][I48O63] add checknumerics

Merge pull request !31584 from 郑鹏飞/checknumerics
2022-05-30 13:00:29 +00:00 · 2022-05-30 13:00:29 +00:00 · 68b3a906dc
parent f2df6b621c ffda33b732
commit 68b3a906dc
10 changed files with 333 additions and 1 deletions
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/check_numerics_cpu_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/check_numerics_cpu_kernel.cc
@ -0,0 +1,85 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "plugin/device/cpu/kernel/check_numerics_cpu_kernel.h"
+#include <cmath>
+#include "abstract/utils.h"
+#include "plugin/device/cpu/hal/device/cpu_device_address.h"
+
+namespace mindspore {
+namespace kernel {
+namespace {
+constexpr size_t kCheckNumericsInputsNum = 1;
+constexpr size_t kCheckNumericsOutputsNum = 1;
+}  // namespace
+
+void CheckNumericsCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
+  MS_EXCEPTION_IF_NULL(kernel_node);
+  kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
+  input_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
+  if (dtype_map_.find(input_dtype_) == dtype_map_.end()) {
+    MS_LOG(EXCEPTION) << "For '" << kernel_name_
+                      << "', the dtype of 'x' should be float16, float32 or float64, but got: " << input_dtype_;
+  }
+}
+
+bool CheckNumericsCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs,
+                                       const std::vector<kernel::AddressPtr> &,
+                                       const std::vector<kernel::AddressPtr> &outputs) {
+  CHECK_KERNEL_INPUTS_NUM(inputs.size(), kCheckNumericsInputsNum, kernel_name_);
+  CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kCheckNumericsOutputsNum, kernel_name_);
+  if (input_dtype_ == kNumberTypeFloat16) {
+    LaunchKernelFloat<float16>(inputs, outputs);
+  } else if (input_dtype_ == kNumberTypeFloat32) {
+    LaunchKernelFloat<float>(inputs, outputs);
+  } else if (input_dtype_ == kNumberTypeFloat64) {
+    LaunchKernelFloat<double>(inputs, outputs);
+  }
+  return true;
+}
+
+template <typename T>
+void CheckNumericsCpuKernelMod::CheckNanOrInf(T value) {
+  if (std::isnan(value)) {
+    MS_LOG(EXCEPTION) << ": Tensor had NaN values";
+  } else if (std::isinf(value)) {
+    MS_LOG(EXCEPTION) << ": Tensor had Inf values";
+  }
+}
+
+template <typename T>
+void CheckNumericsCpuKernelMod::LaunchKernelFloat(const std::vector<AddressPtr> &inputs,
+                                                  const std::vector<kernel::AddressPtr> &outputs) {
+  T *input = reinterpret_cast<T *>(inputs[0]->addr);
+  auto *output = reinterpret_cast<T *>(outputs[0]->addr);
+  size_t elem_num = inputs[0]->size / sizeof(T);
+
+  for (size_t i = 0; i < elem_num; i++) {
+    if constexpr (std::is_same_v<T, float16>) {
+      auto value = static_cast<float>(input[i]);
+      CheckNanOrInf(value);
+      output[i] = input[i];
+    } else {
+      auto value = input[i];
+      CheckNanOrInf(value);
+      output[i] = input[i];
+    }
+  }
+}
+
+MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, CheckNumerics, CheckNumericsCpuKernelMod);
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/check_numerics_cpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/check_numerics_cpu_kernel.h
@ -0,0 +1,63 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CHECK_NUMERICS_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CHECK_NUMERICS_CPU_KERNEL_H_
+
+#include <map>
+#include <vector>
+#include <memory>
+#include <string>
+#include <complex>
+#include "plugin/device/cpu/kernel/cpu_kernel.h"
+#include "plugin/factory/ms_factory.h"
+
+namespace mindspore {
+namespace kernel {
+class CheckNumericsCpuKernelMod : public DeprecatedNativeCpuKernelMod {
+ public:
+  CheckNumericsCpuKernelMod() = default;
+  ~CheckNumericsCpuKernelMod() override = default;
+
+  void InitKernel(const CNodePtr &kernelNode) override;
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs) override;
+
+ protected:
+  std::vector<KernelAttr> GetOpSupport() override {
+    static std::vector<KernelAttr> support_list = {
+      KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
+      KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+      KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64)};
+    return support_list;
+  }
+
+ private:
+  template <typename T>
+  void LaunchKernelFloat(const std::vector<AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);
+
+  template <typename T>
+  void CheckNanOrInf(T value);
+
+  std::map<TypeId, size_t> dtype_map_ = {
+    {kNumberTypeFloat16, sizeof(float16)}, {kNumberTypeFloat32, sizeof(float)}, {kNumberTypeFloat64, sizeof(double)}};
+  TypeId input_dtype_{kTypeUnknown};
+};
+}  // namespace kernel
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CHECK_NUMERICS_CPU_KERNEL_H_
--- a/mindspore/core/ops/check_numerics.cc
+++ b/mindspore/core/ops/check_numerics.cc
@ -0,0 +1,58 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ops/check_numerics.h"
+#include <string>
+#include <algorithm>
+#include <memory>
+#include <set>
+#include <vector>
+#include "ops/op_utils.h"
+#include "mindapi/src/helper.h"
+#include "utils/check_convert_utils.h"
+#include "abstract/ops/primitive_infer_map.h"
+
+namespace mindspore {
+namespace ops {
+namespace {
+abstract::ShapePtr CheckNumericsInferShape(const PrimitivePtr &primitive,
+                                           const std::vector<AbstractBasePtr> &input_args) {
+  auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
+  return std::make_shared<abstract::Shape>(x_shape);
+}
+
+TypePtr CheckNumericsInferType(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) {
+  auto prim_name = primitive->name();
+  (void)CheckAndConvertUtils::CheckArgs<abstract::AbstractTensor>(prim_name, input_args, 0);
+  auto x_dtype = input_args[0]->BuildType();
+  const std::set<TypePtr> valid_types = {kFloat16, kFloat32, kFloat64};
+  (void)CheckAndConvertUtils::CheckTensorTypeValid("x", x_dtype, valid_types, primitive->name());
+  return x_dtype;
+}
+}  // namespace
+AbstractBasePtr CheckNumericsInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                   const std::vector<AbstractBasePtr> &input_args) {
+  MS_EXCEPTION_IF_NULL(primitive);
+  const int64_t kInputNum = 1;
+  CheckAndConvertUtils::CheckInputArgs(input_args, kGreaterEqual, kInputNum, primitive->name());
+  auto infer_type = CheckNumericsInferType(primitive, input_args);
+  auto infer_shape = CheckNumericsInferShape(primitive, input_args);
+  return abstract::MakeAbstract(infer_shape, infer_type);
+}
+MIND_API_BASE_IMPL(CheckNumerics, PrimitiveC, BaseOperator);
+REGISTER_PRIMITIVE_EVAL_IMPL(CheckNumerics, prim::kPrimCheckNumerics, CheckNumericsInfer, nullptr, true);
+}  // namespace ops
+}  // namespace mindspore
--- a/mindspore/core/ops/check_numerics.h
+++ b/mindspore/core/ops/check_numerics.h
@ -0,0 +1,42 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CORE_OPS_CHECKNUMERICS_H_
+#define MINDSPORE_CORE_OPS_CHECKNUMERICS_H_
+#include <map>
+#include <vector>
+#include <string>
+#include <memory>
+#include "ops/base_operator.h"
+#include "mindapi/base/types.h"
+
+namespace mindspore {
+namespace ops {
+constexpr auto kNameCheckNumerics = "CheckNumerics";
+
+class CheckNumerics : public BaseOperator {
+ public:
+  MIND_API_BASE_MEMBER(CheckNumerics);
+  CheckNumerics() : BaseOperator(kNameCheckNumerics) { InitIOName({"x"}, {"y"}); }
+};
+
+abstract::AbstractBasePtr CheckNumericsInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                             const std::vector<abstract::AbstractBasePtr> &input_args);
+using PrimCheckNumericsPtr = std::shared_ptr<CheckNumerics>;
+}  // namespace ops
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CORE_OPS_CHECKNUMERICS_H_
--- a/mindspore/core/ops/core_ops.h
+++ b/mindspore/core/ops/core_ops.h
@ -109,6 +109,7 @@ constexpr auto kSegmentSum = "SegmentSum";
 constexpr auto kSegmentMin = "SegmentMin";
 constexpr auto kDynamicShape = "DynamicShape";
 constexpr auto kTensorShape = "TensorShape";
+constexpr auto kCheckNumerics = "CheckNumerics";
 constexpr auto kStack = "Stack";
 constexpr auto kUnstack = "Unstack";
 constexpr auto kTupleGetItem = "TupleGetItem";
@ -330,6 +331,7 @@ GVAR_DEF(PrimitivePtr, kPrimStridedSlice, std::make_shared<Primitive>(kStridedSl
 GVAR_DEF(PrimitivePtr, kPrimStridedSliceGrad, std::make_shared<Primitive>(kStridedSliceGrad));
 GVAR_DEF(PrimitivePtr, kPrimTensorShape, std::make_shared<Primitive>(kTensorShape));
 GVAR_DEF(PrimitivePtr, kPrimDynamicShape, std::make_shared<Primitive>(kDynamicShape));
+GVAR_DEF(PrimitivePtr, kPrimCheckNumerics, std::make_shared<Primitive>(kCheckNumerics));
 GVAR_DEF(PrimitivePtr, kPrimEmbeddingLookup, std::make_shared<Primitive>("EmbeddingLookup"));
 GVAR_DEF(PrimitivePtr, kPrimEmbeddingLookupCommGrad, std::make_shared<Primitive>("EmbeddingLookupCommGrad"));
 GVAR_DEF(PrimitivePtr, kPrimSize, std::make_shared<Primitive>("Size"));
--- a/mindspore/python/mindspore/ops/_grad_experimental/grad_array_ops.py
+++ b/mindspore/python/mindspore/ops/_grad_experimental/grad_array_ops.py
@ -26,6 +26,7 @@ from ..operations.array_ops import MatrixDiagV3
 from ..operations.array_ops import MatrixDiagPartV3
 from ..operations.array_ops import MatrixSetDiagV3
 from ..operations.array_ops import Triu
+from ..operations.array_ops import CheckNumerics
 from ..operations.array_ops import SegmentMax
 from ..operations.array_ops import SegmentMin
 from ..operations.array_ops import SegmentSum
@ -227,6 +228,17 @@ def get_bprop_triu(self):
    return bprop


+@bprop_getters.register(CheckNumerics)
+def get_bprop_check_numerics(self):
+    """Generate bprop for CheckNumerics"""
+    check_numerics = CheckNumerics()
+
+    def bprop(x_input, out, dout):
+        return (check_numerics(dout),)
+
+    return bprop
+
+
@bprop_getters.register(P.SplitV)
 def get_bprop_split_v(self):
    """Generate bprop for SplitV"""
--- a/mindspore/python/mindspore/ops/_op_impl/aicpu/init.py
+++ b/mindspore/python/mindspore/ops/_op_impl/aicpu/init.py
@ -156,6 +156,7 @@ from .environ_set import _environ_set_aicpu
 from .environ_get import _environ_get_aicpu
 from .environ_destroy_all import _environ_destroy_all_aicpu
 from .cross import _cross_aicpu
+from .check_numerics import _check_numerics_aicpu
 from .cummax import _cummax_aicpu
 from .round import _round_aicpu
 from .truncated_normal import _truncated_normal_aicpu
--- a/mindspore/python/mindspore/ops/_op_impl/aicpu/check_numerics.py
+++ b/mindspore/python/mindspore/ops/_op_impl/aicpu/check_numerics.py
@ -0,0 +1,33 @@
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""CheckNumerics op"""
+from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType
+
+check_numerics_op_info = AiCPURegOp("CheckNumerics") \
+    .fusion_type("OPAQUE") \
+    .attr("message", "str") \
+    .input(0, "x", "required") \
+    .output(0, "y", "required") \
+    .dtype_format(DataType.F16_Default, DataType.F16_Default) \
+    .dtype_format(DataType.F32_Default, DataType.F32_Default) \
+    .dtype_format(DataType.F64_Default, DataType.F64_Default) \
+    .get_op_info()
+
+
+@op_info_register(check_numerics_op_info)
+def _check_numerics_aicpu():
+    """CheckNumerics AiCPU register"""
+    return
--- a/mindspore/python/mindspore/ops/operations/array_ops.py
+++ b/mindspore/python/mindspore/ops/operations/array_ops.py
@ -10,7 +10,6 @@
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
-
 # limitations under the License.
 # ============================================================================

@ -250,6 +249,38 @@ class SameTypeShape(PrimitiveWithInfer):
        return x


+class CheckNumerics(Primitive):
+    """
+    Checks a tensor for NaN and Inf values.
+
+    Inputs:
+        - **x** (Tensor) - Input Tensor of any dimension. The data type is float16, float32 or float64.
+
+    Outputs:
+        Tensor, has the same shape and data type as `x` if `x` has no nan or inf values.
+
+    Raises:
+        TypeError: If `x` data type is not float16, float32, float64.
+        RuntimeError: If `x` has nan or inf values.
+
+    Supported Platforms:
+        ``Ascend`` ``CPU``
+
+    Examples:
+        >>> x = Tensor(np.array([[1, 3], [2, 4]], dtype=np.float32))
+        >>> checknumerics = ops.CheckNumerics()
+        >>> output = checknumerics(x)
+        >>> print(output)
+        [[1. 3.]
+         [2. 4.]]
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        """init CheckNumerics"""
+        self.init_prim_io_names(inputs=['x'], outputs=['y'])
+
+
 class Cast(PrimitiveWithInfer):
    """
    Returns a tensor with the new specified data type.
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@ -37,6 +37,7 @@ from mindspore.ops.operations.math_ops import ReduceStd
 from mindspore.ops.operations.math_ops import Trace
 from mindspore.ops.operations import nn_ops as nps
 from mindspore.ops.operations.array_ops import Tril
+from mindspore.ops.operations.array_ops import CheckNumerics
 from mindspore.ops.operations.array_ops import SegmentMax
 from mindspore.ops.operations.array_ops import SegmentMin
 from mindspore.ops.operations.array_ops import SegmentSum
@ -2768,6 +2769,10 @@ test_case_array_ops = [
        'block': P.Shape(),
        'desc_inputs': [[3, 3, 2, 2]],
        'skip': ['backward']}),
+    ('CheckNumerics', {
+        'block': CheckNumerics(),
+        'desc_inputs': [[1, 2, 3, 4]],
+        'skip': ['backward']}),
    ('Reshape', {
        'block': P.Reshape(),
        'desc_const': [(64,)],