forked from mindspore-Ecosystem/mindspore
pynative consistent run fail
This commit is contained in:
parent
8bd048cb0f
commit
2e77736443
|
@ -106,7 +106,7 @@ PYBIND11_MODULE(_c_expression, m) {
|
|||
py::arg("batch_size"), py::arg("types"), py::arg("shapes"), py::arg("input_indexs"),
|
||||
py::arg("phase") = py::str("dataset"), py::arg("need_run") = py::bool_(true), "Init and exec dataset.");
|
||||
(void)m.def("_set_dataset_mode_config", &mindspore::ConfigManager::SetDatasetModeConfig, "API for set dataset mode.");
|
||||
(void)m.def("init_backend", &mindspore::pipeline::InitBackend, "Init Backend.");
|
||||
(void)m.def("init_pipeline", &mindspore::pipeline::InitPipeline, "Init Pipeline.");
|
||||
|
||||
(void)m.def("export_graph", &mindspore::pipeline::ExportGraph, "Export Graph.");
|
||||
|
||||
|
|
|
@ -913,7 +913,7 @@ bool InitExecDataset(const std::string &queue_name, int64_t iter_num, int64_t ba
|
|||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
if (!context::IsTsdOpened(ms_context) || !context::IsGeInited(ms_context)) {
|
||||
(void)InitBackend();
|
||||
(void)InitPipeline();
|
||||
}
|
||||
#endif
|
||||
if (iter_num == -1) {
|
||||
|
@ -1014,7 +1014,7 @@ void ResetOpId() { mindspore::id_generator::reset_id(); }
|
|||
|
||||
void InitHccl() {
|
||||
#ifdef ENABLE_GE
|
||||
(void)InitBackend();
|
||||
(void)InitPipeline();
|
||||
#else
|
||||
mindspore::parse::python_adapter::set_python_env_flag(true);
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
|
@ -1081,7 +1081,10 @@ void StartUpProfiling() {
|
|||
}
|
||||
}
|
||||
|
||||
void InitBackend() {
|
||||
void InitPipeline() {
|
||||
// If previous pipeline exit with exception, memory cleaner's flags maybe unpredictable, so init when a new pipeline
|
||||
// start.
|
||||
pipeline::Resource::mem_cleaner().Init();
|
||||
// set python env flag
|
||||
mindspore::parse::python_adapter::set_python_env_flag(true);
|
||||
// Startup profiling before open tsd
|
||||
|
|
|
@ -133,7 +133,7 @@ bool InitDistribute(const std::map<std::string, std::string> &options);
|
|||
void ResetOpId();
|
||||
void InitHccl();
|
||||
void FinalizeHccl();
|
||||
void InitBackend();
|
||||
void InitPipeline();
|
||||
void FinalizeBackend();
|
||||
void ClearResAtexit();
|
||||
void ReleaseGeTsd();
|
||||
|
|
|
@ -277,6 +277,31 @@ Any Resource::GetAttrPtr(const TypeId &type, const std::string &name) {
|
|||
return GetMethodOrAttr(name, type_id, attr_map);
|
||||
}
|
||||
|
||||
void Resource::Clean() {
|
||||
// AbstractTensor->elements() will be saved in AbstractBasePtrList
|
||||
args_spec_.clear();
|
||||
input_ = py::none();
|
||||
// Context with AbstractBasePtrList may be saved in GraphEvaluator
|
||||
// some Evaluator like ResolveEvaluator may save Python object in cache,
|
||||
// it should be cleaned before Python Interpreter destructed.
|
||||
MS_EXCEPTION_IF_NULL(engine_);
|
||||
engine_->ClearEvaluatorCache();
|
||||
// clean static variable to prevent from crash. As static variable is released after
|
||||
// Python threads is released.
|
||||
parse::data_converter::ClearObjectCache();
|
||||
parse::Parser::CleanParserResource();
|
||||
parse::CleanDataClassToClassMap();
|
||||
trace::ClearTraceStack();
|
||||
is_cleaned_ = true;
|
||||
}
|
||||
|
||||
void MemoryCleaner::Init() {
|
||||
pynative_in_construct_process_ = false;
|
||||
pynative_in_end_graph_process_ = false;
|
||||
pynative_released_history_.clear();
|
||||
pynative_new_primtives_squence_.clear();
|
||||
}
|
||||
|
||||
MemoryCleaner Resource::mem_cleaner_ = MemoryCleaner();
|
||||
void MemoryCleaner::RecordPrimitivePy(PrimitivePy *prim) {
|
||||
if (prim == nullptr) {
|
||||
|
@ -285,7 +310,7 @@ void MemoryCleaner::RecordPrimitivePy(PrimitivePy *prim) {
|
|||
all_primitives_[prim] = true;
|
||||
}
|
||||
|
||||
void MemoryCleaner::ErasePrimitivePy(PrimitivePy *prim) {
|
||||
void MemoryCleaner::ReleasePrimitivePyObj(PrimitivePy *prim) {
|
||||
if (prim == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
@ -319,6 +344,7 @@ void MemoryCleaner::RecordPynativeShortLifePrimitivePy(PrimitivePy *prim) {
|
|||
}
|
||||
MS_LOG(DEBUG) << "Record pynative tmp primitve:" << prim->ToString();
|
||||
pynative_short_life_primitives_.insert(prim);
|
||||
pynative_new_primtives_squence_.push_back(prim->ToString());
|
||||
}
|
||||
|
||||
void MemoryCleaner::ErasePynativeShortLifePrimitivePy(PrimitivePy *prim) {
|
||||
|
@ -328,15 +354,22 @@ void MemoryCleaner::ErasePynativeShortLifePrimitivePy(PrimitivePy *prim) {
|
|||
if (pynative_short_life_primitives_.find(prim) == pynative_short_life_primitives_.end()) {
|
||||
return;
|
||||
}
|
||||
pynative_short_life_primitives_.erase(prim);
|
||||
MS_LOG(DEBUG) << "Erase pynative tmp primitive:" << prim->ToString();
|
||||
ErasePrimitivePy(prim);
|
||||
}
|
||||
|
||||
void MemoryCleaner::ClearPynativeShortLifePrimitivePy() {
|
||||
for (auto &primitive : pynative_short_life_primitives_) {
|
||||
ErasePynativeShortLifePrimitivePy(primitive);
|
||||
// If the primitives name sequence never been released before, keep the primtives alive
|
||||
if (std::find(pynative_released_history_.begin(), pynative_released_history_.end(),
|
||||
pynative_new_primtives_squence_) == pynative_released_history_.end()) {
|
||||
pynative_released_history_.push_back(pynative_new_primtives_squence_);
|
||||
} else {
|
||||
for (auto &primitive : pynative_short_life_primitives_) {
|
||||
ReleasePrimitivePyObj(primitive);
|
||||
}
|
||||
}
|
||||
pynative_short_life_primitives_.clear();
|
||||
pynative_new_primtives_squence_.clear();
|
||||
}
|
||||
|
||||
void MemoryCleaner::EnterPynativeConstructProcess() { pynative_in_construct_process_ = true; }
|
||||
|
@ -348,23 +381,5 @@ bool MemoryCleaner::IsInPynativeConstructProcess() const { return pynative_in_co
|
|||
void MemoryCleaner::EnterPynativeEndGraphProcess() { pynative_in_end_graph_process_ = true; }
|
||||
void MemoryCleaner::LeavePynativeEndGraphProcess() { pynative_in_end_graph_process_ = false; }
|
||||
bool MemoryCleaner::IsInPynativeEndGraphProcess() const { return pynative_in_end_graph_process_; }
|
||||
|
||||
void Resource::Clean() {
|
||||
// AbstractTensor->elements() will be saved in AbstractBasePtrList
|
||||
args_spec_.clear();
|
||||
input_ = py::none();
|
||||
// Context with AbstractBasePtrList may be saved in GraphEvaluator
|
||||
// some Evaluator like ResolveEvaluator may save Python object in cache,
|
||||
// it should be cleaned before Python Interpreter destructed.
|
||||
MS_EXCEPTION_IF_NULL(engine_);
|
||||
engine_->ClearEvaluatorCache();
|
||||
// clean static variable to prevent from crash. As static variable is released after
|
||||
// Python threads is released.
|
||||
parse::data_converter::ClearObjectCache();
|
||||
parse::Parser::CleanParserResource();
|
||||
parse::CleanDataClassToClassMap();
|
||||
trace::ClearTraceStack();
|
||||
is_cleaned_ = true;
|
||||
}
|
||||
} // namespace pipeline
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -57,8 +57,10 @@ class MemoryCleaner {
|
|||
public:
|
||||
MemoryCleaner() = default;
|
||||
~MemoryCleaner() = default;
|
||||
void Init();
|
||||
|
||||
void RecordPrimitivePy(PrimitivePy *prim);
|
||||
void ErasePrimitivePy(PrimitivePy *prim);
|
||||
void ReleasePrimitivePyObj(PrimitivePy *prim);
|
||||
void ClearPrimitivePyPythonObj();
|
||||
|
||||
void RecordPynativeShortLifePrimitivePy(PrimitivePy *prim);
|
||||
|
@ -77,6 +79,9 @@ class MemoryCleaner {
|
|||
// PrimitivePy objects that created in pynative construct process.These primitives should be released after construct
|
||||
// finished.
|
||||
std::unordered_set<PrimitivePy *> pynative_short_life_primitives_;
|
||||
// Sequence of primtive names in one construct process.
|
||||
std::vector<std::string> pynative_new_primtives_squence_;
|
||||
std::vector<std::vector<std::string>> pynative_released_history_;
|
||||
bool pynative_in_construct_process_{false};
|
||||
bool pynative_in_end_graph_process_{false};
|
||||
};
|
||||
|
|
|
@ -56,6 +56,7 @@ PrimitivePy::PrimitivePy(const py::str &name, const py::object &python_obj)
|
|||
: Primitive(name, false), python_obj_(python_obj), signatures_() {
|
||||
auto &mem_cleaner = pipeline::Resource::mem_cleaner();
|
||||
mem_cleaner.RecordPrimitivePy(this);
|
||||
MS_LOG(DEBUG) << "New primitive:" << name;
|
||||
if (mem_cleaner.IsInPynativeConstructProcess() && !mem_cleaner.IsInPynativeEndGraphProcess()) {
|
||||
mem_cleaner.RecordPynativeShortLifePrimitivePy(this);
|
||||
}
|
||||
|
@ -63,7 +64,7 @@ PrimitivePy::PrimitivePy(const py::str &name, const py::object &python_obj)
|
|||
PrimitivePy::~PrimitivePy() {
|
||||
// Erase primitive here to set released flag false, to avoid calling released pointer when clear primitives in
|
||||
// resource.
|
||||
pipeline::Resource::mem_cleaner().ErasePrimitivePy(this);
|
||||
pipeline::Resource::mem_cleaner().ReleasePrimitivePyObj(this);
|
||||
MS_LOG(DEBUG) << "Release:" << ToString();
|
||||
}
|
||||
void PrimitivePy::SetPyObj(const py::object &obj) { python_obj_ = obj; }
|
||||
|
@ -327,6 +328,10 @@ py::dict PrimitivePy::RunInfer(const py::tuple &args) {
|
|||
if (!HasPyObj()) {
|
||||
MS_LOG(EXCEPTION) << "[" << this->ToString() << "]: pyobj is empty";
|
||||
}
|
||||
// Python obj could be replaced as None, so it will losed the original info when throw exception in python.
|
||||
if (!py::hasattr(python_obj_, PY_PRIM_METHOD_INFER)) {
|
||||
MS_LOG(EXCEPTION) << "prim:" << ToString() << " has no attr:" << PY_PRIM_METHOD_INFER;
|
||||
}
|
||||
auto infer_fuc = python_obj_.attr(PY_PRIM_METHOD_INFER);
|
||||
return infer_fuc(*args);
|
||||
}
|
||||
|
@ -335,6 +340,10 @@ void PrimitivePy::RunCheck(const py::tuple &args) {
|
|||
if (!HasPyObj()) {
|
||||
MS_LOG(EXCEPTION) << "[" << this->ToString() << "]: pyobj is empty";
|
||||
}
|
||||
// Python obj could be replaced as None, so it will losed the original info when throw exception in python.
|
||||
if (!py::hasattr(python_obj_, PY_PRIM_METHOD_CHECK)) {
|
||||
MS_LOG(EXCEPTION) << "prim:" << ToString() << " has no attr:" << PY_PRIM_METHOD_CHECK;
|
||||
}
|
||||
auto check_func = python_obj_.attr(PY_PRIM_METHOD_CHECK);
|
||||
(void)check_func(*args);
|
||||
}
|
||||
|
@ -343,6 +352,10 @@ py::object PrimitivePy::RunInferValue(const py::tuple &args) {
|
|||
if (!HasPyObj()) {
|
||||
MS_LOG(EXCEPTION) << "[" << this->ToString() << "]: pyobj is empty";
|
||||
}
|
||||
// Python obj could be replaced as None, so it will losed the original info when throw exception in python.
|
||||
if (!py::hasattr(python_obj_, PY_PRIM_METHOD_INFER_VALUE)) {
|
||||
MS_LOG(EXCEPTION) << "prim:" << ToString() << " has no attr:" << PY_PRIM_METHOD_INFER_VALUE;
|
||||
}
|
||||
auto infer_value = python_obj_.attr(PY_PRIM_METHOD_INFER_VALUE);
|
||||
return infer_value(*args);
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ from mindspore import context
|
|||
from mindspore import log as logger
|
||||
from .tensor import Tensor as MsTensor
|
||||
from .._c_expression import generate_key, Executor_, Tensor, MetaTensor, PynativeExecutor_
|
||||
from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_backend
|
||||
from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_pipeline
|
||||
from ..parallel._ps_context import _is_role_pserver
|
||||
from ..parallel._utils import _get_device_num, _get_global_rank, _need_to_full, _check_full_batch, _to_full_tensor, \
|
||||
_get_parameter_broadcast
|
||||
|
@ -195,7 +195,7 @@ class _MindSporeFunction:
|
|||
|
||||
@_wrap_func
|
||||
def __call__(self, *args):
|
||||
init_backend()
|
||||
init_pipeline()
|
||||
converted, arguments_dict, parse_method = _convert_function_arguments(self.fn, *args)
|
||||
if not converted:
|
||||
raise RuntimeError('Process function parameter is failure')
|
||||
|
|
|
@ -24,7 +24,7 @@ import numpy
|
|||
from mindspore import log as logger
|
||||
from mindspore.common.parameter import PARAMETER_NAME_DEFAULT
|
||||
from .. import context
|
||||
from .._c_expression import init_backend, Cell_
|
||||
from .._c_expression import init_pipeline, Cell_
|
||||
from .._checkparam import Validator
|
||||
from ..common import dtype as mstype
|
||||
from ..common.api import _executor, _pynative_exec
|
||||
|
@ -90,7 +90,7 @@ class Cell(Cell_):
|
|||
self._parameter_layout_dict = {}
|
||||
self._create_time = int(time.time() * 1e9)
|
||||
self.phase_prefix = ""
|
||||
init_backend()
|
||||
init_pipeline()
|
||||
|
||||
# call gc to release GE session resources used by non-used cell objects
|
||||
if os.getenv('GC_COLLECT_IN_CELL') == '1':
|
||||
|
|
Loading…
Reference in New Issue