!48557 [JIT Fallback] Change PyExecute output default type from Float32 to Float64 (Workaround), and Launch() uses prebuilt output created by Infer.
Merge pull request !48557 from 张清华/opt_jit_fallback
This commit is contained in:
commit
883382e86c
|
@ -178,9 +178,9 @@ abstract::AbstractBasePtr MakeNewAbstract(const AnfNodePtr &input, const tensor:
|
|||
new_abs->set_value(depended_value);
|
||||
|
||||
// Set user data for PyExecute infer.
|
||||
if (input->has_user_data<kernel::PyExecuteOutputData>()) {
|
||||
const auto &output_data = input->user_data<kernel::PyExecuteOutputData>();
|
||||
new_abs->set_user_data<kernel::PyExecuteOutputData>(output_data);
|
||||
if (input->has_user_data<kernel::PyExecuteOutputUserData>()) {
|
||||
const auto &output_data = input->user_data<kernel::PyExecuteOutputUserData>();
|
||||
new_abs->set_user_data<kernel::PyExecuteOutputUserData>(output_data);
|
||||
}
|
||||
} else if (abs->isa<abstract::AbstractScalar>()) {
|
||||
auto type = depended_value->Dtype()->type_id();
|
||||
|
@ -272,7 +272,7 @@ void InferShape(const CNodePtr &cnode, std::map<uint32_t, tensor::TensorPtr> *de
|
|||
}
|
||||
|
||||
auto updated_abs = MakeNewAbstract(real_input, depended_value, real_input_index);
|
||||
if (updated_abs->has_user_data<kernel::PyExecuteOutputData>()) {
|
||||
if (updated_abs->has_user_data<kernel::PyExecuteOutputUserData>()) {
|
||||
has_py_execute_data = true;
|
||||
}
|
||||
(void)args_spec_list.emplace_back(updated_abs);
|
||||
|
|
|
@ -810,11 +810,13 @@ AbstractBasePtrList RectifyAbstractFromDynamicInput(const PrimitivePtr &prim,
|
|||
for (auto item : dynamic_inputs_index) {
|
||||
if (item == kNotDynamicFlag) {
|
||||
if (input_index >= input_abstract.size()) {
|
||||
// Not to check for PyExecute.
|
||||
if ((prim->Hash() == prim::kPrimPyExecute->Hash() && prim->name() == prim::kPrimPyExecute->name())) {
|
||||
MS_LOG(WARNING) << "For primitive \'PyExecute\', index " << input_index
|
||||
<< " is out of range in input abstract " << input_abstract.size();
|
||||
continue;
|
||||
}
|
||||
MS_LOG(EXCEPTION) << "Index " << input_index << " is out of range in input abstract " << input_abstract.size();
|
||||
MS_LOG(EXCEPTION) << "For primitive \'" << prim->name() << "\', index " << input_index
|
||||
<< " is out of range in input abstract " << input_abstract.size();
|
||||
}
|
||||
(void)rectifyed_abs_list.emplace_back(input_abstract[input_index++]);
|
||||
} else {
|
||||
|
@ -825,12 +827,13 @@ AbstractBasePtrList RectifyAbstractFromDynamicInput(const PrimitivePtr &prim,
|
|||
AbstractBasePtrList dynamic_inputs_abs;
|
||||
for (auto index = item; index > 0; --index) {
|
||||
if (input_index >= input_abstract.size()) {
|
||||
// Not to check for PyExecute.
|
||||
if ((prim->Hash() == prim::kPrimPyExecute->Hash() && prim->name() == prim::kPrimPyExecute->name())) {
|
||||
MS_LOG(WARNING) << "For primitive \'PyExecute\', index " << input_index
|
||||
<< " is out of range in input abstract " << input_abstract.size();
|
||||
continue;
|
||||
}
|
||||
MS_LOG(EXCEPTION) << "Index " << input_index << " is out of range in input abstract "
|
||||
<< input_abstract.size();
|
||||
MS_LOG(EXCEPTION) << "For primitive \'" << prim->name() << "\', index " << input_index
|
||||
<< " is out of range in input abstract " << input_abstract.size();
|
||||
}
|
||||
(void)dynamic_inputs_abs.emplace_back(input_abstract[input_index++]);
|
||||
}
|
||||
|
|
|
@ -2,11 +2,13 @@ if(CMAKE_SYSTEM_NAME MATCHES "Windows")
|
|||
file(GLOB_RECURSE _COMMON_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"duplex_pipe_win.cc"
|
||||
"thread_pool.cc"
|
||||
"fallback.cc"
|
||||
)
|
||||
else()
|
||||
file(GLOB_RECURSE _COMMON_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"duplex_pipe.cc"
|
||||
"thread_pool.cc"
|
||||
"fallback.cc"
|
||||
)
|
||||
endif()
|
||||
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "include/common/fallback.h"
|
||||
|
||||
#include <queue>
|
||||
|
||||
#include "include/common/utils/python_adapter.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore {
|
||||
static std::queue<py::object> py_execute_output_queue;
|
||||
|
||||
bool HasPyExecuteOutput() { return !py_execute_output_queue.empty(); }
|
||||
|
||||
py::object PopPyExecuteOutput() {
|
||||
auto output = py_execute_output_queue.front();
|
||||
MS_LOG(DEBUG) << "output: " << output;
|
||||
py_execute_output_queue.pop();
|
||||
return output;
|
||||
}
|
||||
|
||||
void PushPyExecuteOutput(const py::object &output) {
|
||||
MS_LOG(DEBUG) << "output: " << output;
|
||||
py_execute_output_queue.push(output);
|
||||
}
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,31 @@
|
|||
/**
|
||||
* Copyright 2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_INCLUDE_COMMON_FALLBACK_H_
|
||||
#define MINDSPORE_CCSRC_INCLUDE_COMMON_FALLBACK_H_
|
||||
|
||||
#include "include/common/visible.h"
|
||||
|
||||
#include "pybind11/pybind11.h"
|
||||
namespace py = pybind11;
|
||||
|
||||
namespace mindspore {
|
||||
COMMON_EXPORT bool HasPyExecuteOutput();
|
||||
COMMON_EXPORT py::object PopPyExecuteOutput();
|
||||
COMMON_EXPORT void PushPyExecuteOutput(const py::object &output);
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_INCLUDE_COMMON_FALLBACK_H_
|
|
@ -288,7 +288,7 @@ AnfNodePtr GetRealOutput(const AnfNodePtr &node) {
|
|||
|
||||
bool ContainPyExecuteOutputData(const AnfNodePtr &node) {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (node->has_user_data<kernel::PyExecuteOutputData>()) {
|
||||
if (node->has_user_data<kernel::PyExecuteOutputUserData>()) {
|
||||
return true;
|
||||
}
|
||||
auto abs = node->abstract();
|
||||
|
@ -313,9 +313,9 @@ py::object GetVectorRefOutputDataWithPyExecuteObject(const AnfNodePtr &node, con
|
|||
MS_EXCEPTION_IF_NULL(real_node);
|
||||
auto abs = real_node->abstract();
|
||||
if (!abs->isa<abstract::AbstractSequence>() || !real_node->isa<CNode>()) {
|
||||
if (real_node->has_user_data<kernel::PyExecuteOutputData>()) {
|
||||
if (real_node->has_user_data<kernel::PyExecuteOutputUserData>()) {
|
||||
// None case will consider later.
|
||||
const auto &output_data = real_node->user_data<kernel::PyExecuteOutputData>();
|
||||
const auto &output_data = real_node->user_data<kernel::PyExecuteOutputUserData>();
|
||||
return output_data->obj;
|
||||
}
|
||||
return BaseRefToPyData(value, abs);
|
||||
|
@ -342,12 +342,13 @@ py::object GetPyExecuteOutput(const AnfNodePtr &output, const BaseRef &value) {
|
|||
if (support_fallback_runtime) {
|
||||
const auto &real_output = GetRealOutput(output);
|
||||
MS_LOG(INFO) << "Real output: " << real_output << ", " << real_output->DebugString()
|
||||
<< ", has \'PyExecuteOutputData\': " << real_output->has_user_data<kernel::PyExecuteOutputData>();
|
||||
if (real_output->has_user_data<kernel::PyExecuteOutputData>()) {
|
||||
<< ", has \'PyExecuteOutputUserData\': "
|
||||
<< real_output->has_user_data<kernel::PyExecuteOutputUserData>();
|
||||
if (real_output->has_user_data<kernel::PyExecuteOutputUserData>()) {
|
||||
py::gil_scoped_acquire gil_acquire;
|
||||
const auto &output_data = real_output->user_data<kernel::PyExecuteOutputData>();
|
||||
const auto &output_data = real_output->user_data<kernel::PyExecuteOutputUserData>();
|
||||
py::object res_obj = output_data->obj;
|
||||
MS_LOG(INFO) << "Has \'PyExecuteOutputData\', just return it. res_obj: " << res_obj;
|
||||
MS_LOG(INFO) << "Has \'PyExecuteOutputUserData\', just return it. res_obj: " << res_obj;
|
||||
if (!py::isinstance<py::none>(res_obj)) {
|
||||
return res_obj;
|
||||
}
|
||||
|
|
|
@ -2024,7 +2024,7 @@ EvalResultPtr PyExecuteEvaluator::EvalPrim(const AnalysisEnginePtr &, const Abst
|
|||
// Call python script string.
|
||||
MS_LOG(DEBUG) << "Call script: " << script << ", args: " << args_abs_list;
|
||||
|
||||
TypePtr type = kFloat32;
|
||||
TypePtr type = kFloat64;
|
||||
if (current_interpret_node->has_user_data("__py_execute_tensor_type__")) {
|
||||
type = current_interpret_node->user_data<Type>("__py_execute_tensor_type__");
|
||||
MS_LOG(DEBUG) << "type: " << type->ToString();
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "Eigen/Core"
|
||||
#include "abstract/utils.h"
|
||||
#include "plugin/device/cpu/hal/device/cpu_common.h"
|
||||
#include "include/common/fallback.h"
|
||||
#include "include/common/utils/python_adapter.h"
|
||||
#include "include/common/utils/python_fallback_running.h"
|
||||
#include "plugin/factory/ms_factory.h"
|
||||
|
@ -56,13 +57,13 @@ void PyExecuteCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
|||
for (size_t i = 1; i < kernel_node->size(); ++i) {
|
||||
const auto &input = kernel_node->inputs()[i];
|
||||
|
||||
// Check if PyExecuteOutputData exists.
|
||||
// Check if PyExecuteOutputUserData exists.
|
||||
py::object obj = py::none();
|
||||
if (input->has_user_data<PyExecuteOutputData>()) {
|
||||
if (input->has_user_data<PyExecuteOutputUserData>()) {
|
||||
py::gil_scoped_acquire gil_acquire;
|
||||
const auto &output_data = input->user_data<PyExecuteOutputData>();
|
||||
const auto &output_data = input->user_data<PyExecuteOutputUserData>();
|
||||
obj = output_data->obj;
|
||||
MS_LOG(DEBUG) << "Has \'PyExecuteOutputData\', obj: " << obj;
|
||||
MS_LOG(DEBUG) << "Has \'PyExecuteOutputUserData\', obj: " << obj;
|
||||
}
|
||||
|
||||
// Record the inputs' information by their abstract types.
|
||||
|
@ -90,10 +91,10 @@ void PyExecuteCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
|||
}
|
||||
|
||||
void PyExecuteCpuKernelMod::AttachPyOutputData(const py::object &py_res) {
|
||||
const auto &py_output = std::make_shared<PyExecuteOutputData>();
|
||||
const auto &py_output = std::make_shared<PyExecuteOutputUserData>();
|
||||
py_output->obj = py_res;
|
||||
// Set Python data for kernel node.
|
||||
kernel_node_->set_user_data<PyExecuteOutputData>(py_output);
|
||||
kernel_node_->set_user_data<PyExecuteOutputUserData>(py_output);
|
||||
|
||||
// Set Python data for front node.
|
||||
const auto &kernel_graph = std::dynamic_pointer_cast<session::KernelGraph>(kernel_node_->func_graph());
|
||||
|
@ -104,7 +105,7 @@ void PyExecuteCpuKernelMod::AttachPyOutputData(const py::object &py_res) {
|
|||
if (iter != graph_output_map.cend()) {
|
||||
const auto &front_node = iter->second.first;
|
||||
MS_LOG(INFO) << "Found front output for " << kernel_node_ << ", " << kernel_node_->DebugString();
|
||||
front_node->set_user_data<PyExecuteOutputData>(py_output);
|
||||
front_node->set_user_data<PyExecuteOutputUserData>(py_output);
|
||||
} else {
|
||||
MS_LOG(DEBUG) << "Not found, kernel node is not output, " << kernel_node_ << ", " << kernel_node_->DebugString();
|
||||
if (!IS_OUTPUT_ON(mindspore::kDebug)) {
|
||||
|
@ -297,16 +298,29 @@ void TensorToRawMemory(const tensor::TensorPtr &tensor, const AddressPtr &addres
|
|||
bool PyExecuteCpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
MS_LOG(DEBUG) << "Launch PyExecute(), inputs.size: " << inputs.size() << ", outputs: " << outputs.size();
|
||||
if (Py_IsInitialized() != true) {
|
||||
if (Py_IsInitialized() == 0) {
|
||||
MS_LOG(ERROR) << "Py_IsInitialized failed.";
|
||||
return false;
|
||||
}
|
||||
if (outputs.size() != 1) {
|
||||
MS_LOG(EXCEPTION) << "The output num is 1, but got " << outputs.size();
|
||||
}
|
||||
py::gil_scoped_acquire gil_acquire;
|
||||
|
||||
// Check if output exists created by 'CppInferShapeAndType'.
|
||||
if (HasPyExecuteOutput()) {
|
||||
const auto &output = PopPyExecuteOutput();
|
||||
const auto &output_type = py::str(output.get_type());
|
||||
MS_LOG(DEBUG) << "Python *prebuilt* output type: " << output_type << ", output: " << output;
|
||||
if (py::isinstance<tensor::Tensor>(output)) {
|
||||
TensorToRawMemory(output.cast<tensor::TensorPtr>(), outputs[0]);
|
||||
}
|
||||
AttachPyOutputData(output);
|
||||
return true;
|
||||
}
|
||||
MS_LOG(ERROR) << "Prebuilt output result not exists.";
|
||||
|
||||
// Build the script.
|
||||
py::gil_scoped_acquire gil_acquire;
|
||||
const auto &input0_info = inputs_info_[0];
|
||||
const auto &input0_abstract = input0_info.abstract;
|
||||
const auto &input0_abstract_scalar = dyn_cast<abstract::AbstractScalar>(input0_abstract);
|
||||
|
|
|
@ -35,9 +35,9 @@ struct PyExecuteInputInfo {
|
|||
std::vector<int64_t> shape;
|
||||
};
|
||||
|
||||
struct PyExecuteOutputData {
|
||||
struct PyExecuteOutputUserData {
|
||||
py::object obj;
|
||||
constexpr static char key[] = "PyExecuteOutputData";
|
||||
constexpr static char key[] = "PyExecuteOutputUserData";
|
||||
};
|
||||
|
||||
class PyExecuteCpuKernelMod : public DeprecatedNativeCpuKernelMod {
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "pybind11/pybind11.h"
|
||||
#include "pybind_api/pybind_patch.h"
|
||||
|
||||
#include "include/common/fallback.h"
|
||||
#include "mindspore/core/ops/py_execute.h"
|
||||
#include "mindspore/ccsrc/include/common/utils/convert_utils_py.h"
|
||||
#include "mindspore/ccsrc/include/common/utils/python_adapter.h"
|
||||
|
@ -92,20 +93,25 @@ class PyExecuteInitializer {
|
|||
const auto &tuple_abs = values_tuple_abs->cast<abstract::AbstractSequencePtr>();
|
||||
const auto &value_abs = (*tuple_abs)[i];
|
||||
if (value->isa<tensor::Tensor>()) {
|
||||
if (value_abs->has_user_data<kernel::PyExecuteOutputData>()) {
|
||||
const auto &output_data = value_abs->user_data<kernel::PyExecuteOutputData>();
|
||||
if (value_abs->has_user_data<kernel::PyExecuteOutputUserData>()) {
|
||||
const auto &output_data = value_abs->user_data<kernel::PyExecuteOutputUserData>();
|
||||
auto obj = output_data->obj;
|
||||
MS_LOG(DEBUG) << "input[" << i << "], obj: " << obj;
|
||||
local_dict[py::str(key_str->value())] = obj;
|
||||
} else {
|
||||
const auto &py_tensor = ValueToPyData(value);
|
||||
MS_LOG(DEBUG) << "input[" << i << "], py_tensor: " << py_tensor;
|
||||
local_dict[py::str(key_str->value())] = py_tensor;
|
||||
}
|
||||
continue;
|
||||
} else if (value->isa<StringImm>()) {
|
||||
const auto &str_imm = value->cast<StringImmPtr>();
|
||||
local_dict[py::str(key_str->value())] = py::str(str_imm->value());
|
||||
const auto &py_str = py::str(str_imm->value());
|
||||
MS_LOG(DEBUG) << "input[" << i << "], py_str: " << py_str;
|
||||
local_dict[py::str(key_str->value())] = py_str;
|
||||
continue;
|
||||
}
|
||||
MS_LOG(DEBUG) << "input[" << i << "], value: " << value;
|
||||
local_dict[py::str(key_str->value())] = value;
|
||||
}
|
||||
const auto &global_dict = CallPythonGetGlobalParams();
|
||||
|
@ -118,6 +124,7 @@ class PyExecuteInitializer {
|
|||
mindspore::ScopedFallbackRunning fallback_running;
|
||||
const auto &output = parse::data_converter::CallPythonScript(py_script, params);
|
||||
MS_LOG(DEBUG) << "Python output type: " << py::str(output.get_type()) << ", output: " << output;
|
||||
PushPyExecuteOutput(output);
|
||||
if (py::isinstance<tensor::Tensor>(output)) {
|
||||
const auto &tensor = output.cast<tensor::TensorPtr>();
|
||||
const auto &infer_shape = std::make_shared<abstract::Shape>(tensor->shape());
|
||||
|
|
|
@ -38,6 +38,7 @@ AbstractBasePtr PyExecuteInfer::InferPy(const PrimitivePtr &primitive,
|
|||
MS_LOG(EXCEPTION) << "infer_handler_ should not be null.";
|
||||
}
|
||||
const auto &abs = infer_handler_(input_args);
|
||||
MS_LOG(DEBUG) << "output abstract: " << abs;
|
||||
return abs;
|
||||
}
|
||||
|
||||
|
|
|
@ -781,9 +781,9 @@ def eval_script(exp_str, params):
|
|||
raise ValueError(f"eval_script(), params tuple length is wrong, params: {params}")
|
||||
|
||||
# Eval function parses the expression argument and evaluates it as a python expression.
|
||||
logger.debug(f"exp_str: '{exp_str}', params: '{params}'")
|
||||
global_params = params[0]
|
||||
local_params = params[1]
|
||||
logger.debug(f"exp_str: '{exp_str}', params: '{params}'")
|
||||
try:
|
||||
local_params = _convert_python_data(local_params)
|
||||
res = eval(exp_str, global_params, local_params)
|
||||
|
|
|
@ -533,8 +533,7 @@ def test_call_no_self_other_object_method_runtime():
|
|||
assert np.all(result == z)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Not supported by now")
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
|
@ -556,8 +555,7 @@ def test_getattr_tensor_with_wrong_attr():
|
|||
assert "object has no attribute" in str(err.value)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Not supported by now")
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
|
@ -623,3 +621,38 @@ def test_getattr_dict_with_wrong_attr():
|
|||
with pytest.raises(AttributeError) as err:
|
||||
foo({"1": 1, "2": 2}) # Not throw error any more, should move to ST.
|
||||
assert "object has no attribute" in str(err.value)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_parser_fallback_nested_class_outer():
|
||||
"""
|
||||
Feature: Syntax getattr.
|
||||
Description: Graph syntax getattr support custom class input.
|
||||
Expectation: AttributeError.
|
||||
"""
|
||||
class Inner:
|
||||
def __init__(self):
|
||||
self.number = ms.Tensor(2, dtype=ms.int32)
|
||||
|
||||
def act(self, x, y):
|
||||
return self.number * (x + y)
|
||||
|
||||
@ms.jit_class
|
||||
class InnerNet:
|
||||
def __init__(self):
|
||||
self.inner = Inner()
|
||||
|
||||
class NestedNet(ms.nn.Cell):
|
||||
@ms.jit
|
||||
def construct(self, x, y):
|
||||
out = InnerNet().inner.act(x, y)
|
||||
return out
|
||||
|
||||
x = 2
|
||||
y = 4
|
||||
net = NestedNet()
|
||||
assert net(x, y) == 12
|
||||
|
|
Loading…
Reference in New Issue