!48557 [JIT Fallback] Change PyExecute output default type from Float32 to Float64 (Workaround), and Launch() uses prebuilt output created by Infer.

Merge pull request !48557 from 张清华/opt_jit_fallback
2023-02-09 08:50:19 +00:00 · 2023-02-09 08:50:19 +00:00 · 883382e86c
parent 2424649dd0 8b4c1226a5
commit 883382e86c
13 changed files with 168 additions and 36 deletions
--- a/mindspore/ccsrc/backend/common/optimizer/dynamic_shape/dynamic_shape_helper.cc
+++ b/mindspore/ccsrc/backend/common/optimizer/dynamic_shape/dynamic_shape_helper.cc
@ -178,9 +178,9 @@ abstract::AbstractBasePtr MakeNewAbstract(const AnfNodePtr &input, const tensor:
    new_abs->set_value(depended_value);

    // Set user data for PyExecute infer.
-    if (input->has_user_data<kernel::PyExecuteOutputData>()) {
-      const auto &output_data = input->user_data<kernel::PyExecuteOutputData>();
-      new_abs->set_user_data<kernel::PyExecuteOutputData>(output_data);
+    if (input->has_user_data<kernel::PyExecuteOutputUserData>()) {
+      const auto &output_data = input->user_data<kernel::PyExecuteOutputUserData>();
+      new_abs->set_user_data<kernel::PyExecuteOutputUserData>(output_data);
    }
  } else if (abs->isa<abstract::AbstractScalar>()) {
    auto type = depended_value->Dtype()->type_id();
@ -272,7 +272,7 @@ void InferShape(const CNodePtr &cnode, std::map<uint32_t, tensor::TensorPtr> *de
      }

      auto updated_abs = MakeNewAbstract(real_input, depended_value, real_input_index);
-      if (updated_abs->has_user_data<kernel::PyExecuteOutputData>()) {
+      if (updated_abs->has_user_data<kernel::PyExecuteOutputUserData>()) {
        has_py_execute_data = true;
      }
      (void)args_spec_list.emplace_back(updated_abs);
--- a/mindspore/ccsrc/backend/common/optimizer/helper.cc
+++ b/mindspore/ccsrc/backend/common/optimizer/helper.cc
@ -810,11 +810,13 @@ AbstractBasePtrList RectifyAbstractFromDynamicInput(const PrimitivePtr &prim,
  for (auto item : dynamic_inputs_index) {
    if (item == kNotDynamicFlag) {
      if (input_index >= input_abstract.size()) {
-        // Not to check for PyExecute.
        if ((prim->Hash() == prim::kPrimPyExecute->Hash() && prim->name() == prim::kPrimPyExecute->name())) {
+          MS_LOG(WARNING) << "For primitive \'PyExecute\', index " << input_index
+                          << " is out of range in input abstract " << input_abstract.size();
          continue;
        }
-        MS_LOG(EXCEPTION) << "Index " << input_index << " is out of range in input abstract " << input_abstract.size();
+        MS_LOG(EXCEPTION) << "For primitive \'" << prim->name() << "\', index " << input_index
+                          << " is out of range in input abstract " << input_abstract.size();
      }
      (void)rectifyed_abs_list.emplace_back(input_abstract[input_index++]);
    } else {
@ -825,12 +827,13 @@ AbstractBasePtrList RectifyAbstractFromDynamicInput(const PrimitivePtr &prim,
      AbstractBasePtrList dynamic_inputs_abs;
      for (auto index = item; index > 0; --index) {
        if (input_index >= input_abstract.size()) {
-          // Not to check for PyExecute.
          if ((prim->Hash() == prim::kPrimPyExecute->Hash() && prim->name() == prim::kPrimPyExecute->name())) {
+            MS_LOG(WARNING) << "For primitive \'PyExecute\', index " << input_index
+                            << " is out of range in input abstract " << input_abstract.size();
            continue;
          }
-          MS_LOG(EXCEPTION) << "Index " << input_index << " is out of range in input abstract "
-                            << input_abstract.size();
+          MS_LOG(EXCEPTION) << "For primitive \'" << prim->name() << "\', index " << input_index
+                            << " is out of range in input abstract " << input_abstract.size();
        }
        (void)dynamic_inputs_abs.emplace_back(input_abstract[input_index++]);
      }
--- a/mindspore/ccsrc/common/CMakeLists.txt
+++ b/mindspore/ccsrc/common/CMakeLists.txt
@ -2,11 +2,13 @@ if(CMAKE_SYSTEM_NAME MATCHES "Windows")
    file(GLOB_RECURSE _COMMON_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "duplex_pipe_win.cc"
        "thread_pool.cc"
+        "fallback.cc"
    )
 else()
    file(GLOB_RECURSE _COMMON_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
        "duplex_pipe.cc"
        "thread_pool.cc"
+        "fallback.cc"
    )
 endif()

--- a/mindspore/ccsrc/common/fallback.cc
+++ b/mindspore/ccsrc/common/fallback.cc
@ -0,0 +1,40 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "include/common/fallback.h"
+
+#include <queue>
+
+#include "include/common/utils/python_adapter.h"
+#include "utils/log_adapter.h"
+
+namespace mindspore {
+static std::queue<py::object> py_execute_output_queue;
+
+bool HasPyExecuteOutput() { return !py_execute_output_queue.empty(); }
+
+py::object PopPyExecuteOutput() {
+  auto output = py_execute_output_queue.front();
+  MS_LOG(DEBUG) << "output: " << output;
+  py_execute_output_queue.pop();
+  return output;
+}
+
+void PushPyExecuteOutput(const py::object &output) {
+  MS_LOG(DEBUG) << "output: " << output;
+  py_execute_output_queue.push(output);
+}
+}  // namespace mindspore
--- a/mindspore/ccsrc/include/common/fallback.h
+++ b/mindspore/ccsrc/include/common/fallback.h
@ -0,0 +1,31 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_INCLUDE_COMMON_FALLBACK_H_
+#define MINDSPORE_CCSRC_INCLUDE_COMMON_FALLBACK_H_
+
+#include "include/common/visible.h"
+
+#include "pybind11/pybind11.h"
+namespace py = pybind11;
+
+namespace mindspore {
+COMMON_EXPORT bool HasPyExecuteOutput();
+COMMON_EXPORT py::object PopPyExecuteOutput();
+COMMON_EXPORT void PushPyExecuteOutput(const py::object &output);
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_INCLUDE_COMMON_FALLBACK_H_
--- a/mindspore/ccsrc/pipeline/jit/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc
@ -288,7 +288,7 @@ AnfNodePtr GetRealOutput(const AnfNodePtr &node) {

 bool ContainPyExecuteOutputData(const AnfNodePtr &node) {
  MS_EXCEPTION_IF_NULL(node);
-  if (node->has_user_data<kernel::PyExecuteOutputData>()) {
+  if (node->has_user_data<kernel::PyExecuteOutputUserData>()) {
    return true;
  }
  auto abs = node->abstract();
@ -313,9 +313,9 @@ py::object GetVectorRefOutputDataWithPyExecuteObject(const AnfNodePtr &node, con
  MS_EXCEPTION_IF_NULL(real_node);
  auto abs = real_node->abstract();
  if (!abs->isa<abstract::AbstractSequence>() || !real_node->isa<CNode>()) {
-    if (real_node->has_user_data<kernel::PyExecuteOutputData>()) {
+    if (real_node->has_user_data<kernel::PyExecuteOutputUserData>()) {
      // None case will consider later.
-      const auto &output_data = real_node->user_data<kernel::PyExecuteOutputData>();
+      const auto &output_data = real_node->user_data<kernel::PyExecuteOutputUserData>();
      return output_data->obj;
    }
    return BaseRefToPyData(value, abs);
@ -342,12 +342,13 @@ py::object GetPyExecuteOutput(const AnfNodePtr &output, const BaseRef &value) {
  if (support_fallback_runtime) {
    const auto &real_output = GetRealOutput(output);
    MS_LOG(INFO) << "Real output: " << real_output << ", " << real_output->DebugString()
-                 << ", has \'PyExecuteOutputData\': " << real_output->has_user_data<kernel::PyExecuteOutputData>();
-    if (real_output->has_user_data<kernel::PyExecuteOutputData>()) {
+                 << ", has \'PyExecuteOutputUserData\': "
+                 << real_output->has_user_data<kernel::PyExecuteOutputUserData>();
+    if (real_output->has_user_data<kernel::PyExecuteOutputUserData>()) {
      py::gil_scoped_acquire gil_acquire;
-      const auto &output_data = real_output->user_data<kernel::PyExecuteOutputData>();
+      const auto &output_data = real_output->user_data<kernel::PyExecuteOutputUserData>();
      py::object res_obj = output_data->obj;
-      MS_LOG(INFO) << "Has \'PyExecuteOutputData\', just return it. res_obj: " << res_obj;
+      MS_LOG(INFO) << "Has \'PyExecuteOutputUserData\', just return it. res_obj: " << res_obj;
      if (!py::isinstance<py::none>(res_obj)) {
        return res_obj;
      }
--- a/mindspore/ccsrc/pipeline/jit/static_analysis/prim.cc
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/prim.cc
@ -2024,7 +2024,7 @@ EvalResultPtr PyExecuteEvaluator::EvalPrim(const AnalysisEnginePtr &, const Abst
  // Call python script string.
  MS_LOG(DEBUG) << "Call script: " << script << ", args: " << args_abs_list;

-  TypePtr type = kFloat32;
+  TypePtr type = kFloat64;
  if (current_interpret_node->has_user_data("__py_execute_tensor_type__")) {
    type = current_interpret_node->user_data<Type>("__py_execute_tensor_type__");
    MS_LOG(DEBUG) << "type: " << type->ToString();
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/pyexecute/py_execute_cpu_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyexecute/py_execute_cpu_kernel.cc
@ -23,6 +23,7 @@
 #include "Eigen/Core"
 #include "abstract/utils.h"
 #include "plugin/device/cpu/hal/device/cpu_common.h"
+#include "include/common/fallback.h"
 #include "include/common/utils/python_adapter.h"
 #include "include/common/utils/python_fallback_running.h"
 #include "plugin/factory/ms_factory.h"
@ -56,13 +57,13 @@ void PyExecuteCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
  for (size_t i = 1; i < kernel_node->size(); ++i) {
    const auto &input = kernel_node->inputs()[i];

-    // Check if PyExecuteOutputData exists.
+    // Check if PyExecuteOutputUserData exists.
    py::object obj = py::none();
-    if (input->has_user_data<PyExecuteOutputData>()) {
+    if (input->has_user_data<PyExecuteOutputUserData>()) {
      py::gil_scoped_acquire gil_acquire;
-      const auto &output_data = input->user_data<PyExecuteOutputData>();
+      const auto &output_data = input->user_data<PyExecuteOutputUserData>();
      obj = output_data->obj;
-      MS_LOG(DEBUG) << "Has \'PyExecuteOutputData\', obj: " << obj;
+      MS_LOG(DEBUG) << "Has \'PyExecuteOutputUserData\', obj: " << obj;
    }

    // Record the inputs' information by their abstract types.
@ -90,10 +91,10 @@ void PyExecuteCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
 }

 void PyExecuteCpuKernelMod::AttachPyOutputData(const py::object &py_res) {
-  const auto &py_output = std::make_shared<PyExecuteOutputData>();
+  const auto &py_output = std::make_shared<PyExecuteOutputUserData>();
  py_output->obj = py_res;
  // Set Python data for kernel node.
-  kernel_node_->set_user_data<PyExecuteOutputData>(py_output);
+  kernel_node_->set_user_data<PyExecuteOutputUserData>(py_output);

  // Set Python data for front node.
  const auto &kernel_graph = std::dynamic_pointer_cast<session::KernelGraph>(kernel_node_->func_graph());
@ -104,7 +105,7 @@ void PyExecuteCpuKernelMod::AttachPyOutputData(const py::object &py_res) {
  if (iter != graph_output_map.cend()) {
    const auto &front_node = iter->second.first;
    MS_LOG(INFO) << "Found front output for " << kernel_node_ << ", " << kernel_node_->DebugString();
-    front_node->set_user_data<PyExecuteOutputData>(py_output);
+    front_node->set_user_data<PyExecuteOutputUserData>(py_output);
  } else {
    MS_LOG(DEBUG) << "Not found, kernel node is not output, " << kernel_node_ << ", " << kernel_node_->DebugString();
    if (!IS_OUTPUT_ON(mindspore::kDebug)) {
@ -297,16 +298,29 @@ void TensorToRawMemory(const tensor::TensorPtr &tensor, const AddressPtr &addres
 bool PyExecuteCpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
                                   const std::vector<AddressPtr> &outputs) {
  MS_LOG(DEBUG) << "Launch PyExecute(), inputs.size: " << inputs.size() << ", outputs: " << outputs.size();
-  if (Py_IsInitialized() != true) {
+  if (Py_IsInitialized() == 0) {
    MS_LOG(ERROR) << "Py_IsInitialized failed.";
    return false;
  }
  if (outputs.size() != 1) {
    MS_LOG(EXCEPTION) << "The output num is 1, but got " << outputs.size();
  }
+  py::gil_scoped_acquire gil_acquire;
+
+  // Check if output exists created by 'CppInferShapeAndType'.
+  if (HasPyExecuteOutput()) {
+    const auto &output = PopPyExecuteOutput();
+    const auto &output_type = py::str(output.get_type());
+    MS_LOG(DEBUG) << "Python *prebuilt* output type: " << output_type << ", output: " << output;
+    if (py::isinstance<tensor::Tensor>(output)) {
+      TensorToRawMemory(output.cast<tensor::TensorPtr>(), outputs[0]);
+    }
+    AttachPyOutputData(output);
+    return true;
+  }
+  MS_LOG(ERROR) << "Prebuilt output result not exists.";

  // Build the script.
-  py::gil_scoped_acquire gil_acquire;
  const auto &input0_info = inputs_info_[0];
  const auto &input0_abstract = input0_info.abstract;
  const auto &input0_abstract_scalar = dyn_cast<abstract::AbstractScalar>(input0_abstract);
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/pyexecute/py_execute_cpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyexecute/py_execute_cpu_kernel.h
@ -35,9 +35,9 @@ struct PyExecuteInputInfo {
  std::vector<int64_t> shape;
 };

-struct PyExecuteOutputData {
+struct PyExecuteOutputUserData {
  py::object obj;
-  constexpr static char key[] = "PyExecuteOutputData";
+  constexpr static char key[] = "PyExecuteOutputUserData";
 };

 class PyExecuteCpuKernelMod : public DeprecatedNativeCpuKernelMod {
--- a/mindspore/ccsrc/pybind_api/ir/py_execute_py.h
+++ b/mindspore/ccsrc/pybind_api/ir/py_execute_py.h
@ -26,6 +26,7 @@
 #include "pybind11/pybind11.h"
 #include "pybind_api/pybind_patch.h"

+#include "include/common/fallback.h"
 #include "mindspore/core/ops/py_execute.h"
 #include "mindspore/ccsrc/include/common/utils/convert_utils_py.h"
 #include "mindspore/ccsrc/include/common/utils/python_adapter.h"
@ -92,20 +93,25 @@ class PyExecuteInitializer {
      const auto &tuple_abs = values_tuple_abs->cast<abstract::AbstractSequencePtr>();
      const auto &value_abs = (*tuple_abs)[i];
      if (value->isa<tensor::Tensor>()) {
-        if (value_abs->has_user_data<kernel::PyExecuteOutputData>()) {
-          const auto &output_data = value_abs->user_data<kernel::PyExecuteOutputData>();
+        if (value_abs->has_user_data<kernel::PyExecuteOutputUserData>()) {
+          const auto &output_data = value_abs->user_data<kernel::PyExecuteOutputUserData>();
          auto obj = output_data->obj;
+          MS_LOG(DEBUG) << "input[" << i << "], obj: " << obj;
          local_dict[py::str(key_str->value())] = obj;
        } else {
          const auto &py_tensor = ValueToPyData(value);
+          MS_LOG(DEBUG) << "input[" << i << "], py_tensor: " << py_tensor;
          local_dict[py::str(key_str->value())] = py_tensor;
        }
        continue;
      } else if (value->isa<StringImm>()) {
        const auto &str_imm = value->cast<StringImmPtr>();
-        local_dict[py::str(key_str->value())] = py::str(str_imm->value());
+        const auto &py_str = py::str(str_imm->value());
+        MS_LOG(DEBUG) << "input[" << i << "], py_str: " << py_str;
+        local_dict[py::str(key_str->value())] = py_str;
        continue;
      }
+      MS_LOG(DEBUG) << "input[" << i << "], value: " << value;
      local_dict[py::str(key_str->value())] = value;
    }
    const auto &global_dict = CallPythonGetGlobalParams();
@ -118,6 +124,7 @@ class PyExecuteInitializer {
      mindspore::ScopedFallbackRunning fallback_running;
      const auto &output = parse::data_converter::CallPythonScript(py_script, params);
      MS_LOG(DEBUG) << "Python output type: " << py::str(output.get_type()) << ", output: " << output;
+      PushPyExecuteOutput(output);
      if (py::isinstance<tensor::Tensor>(output)) {
        const auto &tensor = output.cast<tensor::TensorPtr>();
        const auto &infer_shape = std::make_shared<abstract::Shape>(tensor->shape());
--- a/mindspore/core/ops/py_execute.cc
+++ b/mindspore/core/ops/py_execute.cc
@ -38,6 +38,7 @@ AbstractBasePtr PyExecuteInfer::InferPy(const PrimitivePtr &primitive,
    MS_LOG(EXCEPTION) << "infer_handler_ should not be null.";
  }
  const auto &abs = infer_handler_(input_args);
+  MS_LOG(DEBUG) << "output abstract: " << abs;
  return abs;
 }

--- a/mindspore/python/mindspore/_extends/parse/parser.py
+++ b/mindspore/python/mindspore/_extends/parse/parser.py
@ -781,9 +781,9 @@ def eval_script(exp_str, params):
        raise ValueError(f"eval_script(), params tuple length is wrong, params: {params}")

    # Eval function parses the expression argument and evaluates it as a python expression.
-    logger.debug(f"exp_str: '{exp_str}', params: '{params}'")
    global_params = params[0]
    local_params = params[1]
+    logger.debug(f"exp_str: '{exp_str}', params: '{params}'")
    try:
        local_params = _convert_python_data(local_params)
        res = eval(exp_str, global_params, local_params)
--- a/tests/st/fallback/test_graph_fallback_runtime.py
+++ b/tests/st/fallback/test_graph_fallback_runtime.py
@ -533,8 +533,7 @@ def test_call_no_self_other_object_method_runtime():
    assert np.all(result == z)


-@pytest.mark.skip(reason="Not supported by now")
-@pytest.mark.level1
+@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@ -556,8 +555,7 @@ def test_getattr_tensor_with_wrong_attr():
    assert "object has no attribute" in str(err.value)


-@pytest.mark.skip(reason="Not supported by now")
-@pytest.mark.level1
+@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@ -623,3 +621,38 @@ def test_getattr_dict_with_wrong_attr():
    with pytest.raises(AttributeError) as err:
        foo({"1": 1, "2": 2})  # Not throw error any more, should move to ST.
    assert "object has no attribute" in str(err.value)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_parser_fallback_nested_class_outer():
+    """
+    Feature: Syntax getattr.
+    Description: Graph syntax getattr support custom class input.
+    Expectation: AttributeError.
+    """
+    class Inner:
+        def __init__(self):
+            self.number = ms.Tensor(2, dtype=ms.int32)
+
+        def act(self, x, y):
+            return self.number * (x + y)
+
+    @ms.jit_class
+    class InnerNet:
+        def __init__(self):
+            self.inner = Inner()
+
+    class NestedNet(ms.nn.Cell):
+        @ms.jit
+        def construct(self, x, y):
+            out = InnerNet().inner.act(x, y)
+            return out
+
+    x = 2
+    y = 4
+    net = NestedNet()
+    assert net(x, y) == 12