pynative consistent run fail

This commit is contained in:
chenfei 2020-12-24 14:33:20 +08:00
parent 8bd048cb0f
commit 2e77736443
8 changed files with 69 additions and 33 deletions

View File

@ -106,7 +106,7 @@ PYBIND11_MODULE(_c_expression, m) {
py::arg("batch_size"), py::arg("types"), py::arg("shapes"), py::arg("input_indexs"),
py::arg("phase") = py::str("dataset"), py::arg("need_run") = py::bool_(true), "Init and exec dataset.");
(void)m.def("_set_dataset_mode_config", &mindspore::ConfigManager::SetDatasetModeConfig, "API for set dataset mode.");
(void)m.def("init_backend", &mindspore::pipeline::InitBackend, "Init Backend.");
(void)m.def("init_pipeline", &mindspore::pipeline::InitPipeline, "Init Pipeline.");
(void)m.def("export_graph", &mindspore::pipeline::ExportGraph, "Export Graph.");

View File

@ -913,7 +913,7 @@ bool InitExecDataset(const std::string &queue_name, int64_t iter_num, int64_t ba
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
if (!context::IsTsdOpened(ms_context) || !context::IsGeInited(ms_context)) {
(void)InitBackend();
(void)InitPipeline();
}
#endif
if (iter_num == -1) {
@ -1014,7 +1014,7 @@ void ResetOpId() { mindspore::id_generator::reset_id(); }
void InitHccl() {
#ifdef ENABLE_GE
(void)InitBackend();
(void)InitPipeline();
#else
mindspore::parse::python_adapter::set_python_env_flag(true);
auto ms_context = MsContext::GetInstance();
@ -1081,7 +1081,10 @@ void StartUpProfiling() {
}
}
void InitBackend() {
void InitPipeline() {
// If previous pipeline exit with exception, memory cleaner's flags maybe unpredictable, so init when a new pipeline
// start.
pipeline::Resource::mem_cleaner().Init();
// set python env flag
mindspore::parse::python_adapter::set_python_env_flag(true);
// Startup profiling before open tsd

View File

@ -133,7 +133,7 @@ bool InitDistribute(const std::map<std::string, std::string> &options);
void ResetOpId();
void InitHccl();
void FinalizeHccl();
void InitBackend();
void InitPipeline();
void FinalizeBackend();
void ClearResAtexit();
void ReleaseGeTsd();

View File

@ -277,6 +277,31 @@ Any Resource::GetAttrPtr(const TypeId &type, const std::string &name) {
return GetMethodOrAttr(name, type_id, attr_map);
}
void Resource::Clean() {
// AbstractTensor->elements() will be saved in AbstractBasePtrList
args_spec_.clear();
input_ = py::none();
// Context with AbstractBasePtrList may be saved in GraphEvaluator
// some Evaluator like ResolveEvaluator may save Python object in cache,
// it should be cleaned before Python Interpreter destructed.
MS_EXCEPTION_IF_NULL(engine_);
engine_->ClearEvaluatorCache();
// clean static variable to prevent from crash. As static variable is released after
// Python threads is released.
parse::data_converter::ClearObjectCache();
parse::Parser::CleanParserResource();
parse::CleanDataClassToClassMap();
trace::ClearTraceStack();
is_cleaned_ = true;
}
void MemoryCleaner::Init() {
pynative_in_construct_process_ = false;
pynative_in_end_graph_process_ = false;
pynative_released_history_.clear();
pynative_new_primtives_squence_.clear();
}
MemoryCleaner Resource::mem_cleaner_ = MemoryCleaner();
void MemoryCleaner::RecordPrimitivePy(PrimitivePy *prim) {
if (prim == nullptr) {
@ -285,7 +310,7 @@ void MemoryCleaner::RecordPrimitivePy(PrimitivePy *prim) {
all_primitives_[prim] = true;
}
void MemoryCleaner::ErasePrimitivePy(PrimitivePy *prim) {
void MemoryCleaner::ReleasePrimitivePyObj(PrimitivePy *prim) {
if (prim == nullptr) {
return;
}
@ -319,6 +344,7 @@ void MemoryCleaner::RecordPynativeShortLifePrimitivePy(PrimitivePy *prim) {
}
MS_LOG(DEBUG) << "Record pynative tmp primitve:" << prim->ToString();
pynative_short_life_primitives_.insert(prim);
pynative_new_primtives_squence_.push_back(prim->ToString());
}
void MemoryCleaner::ErasePynativeShortLifePrimitivePy(PrimitivePy *prim) {
@ -328,15 +354,22 @@ void MemoryCleaner::ErasePynativeShortLifePrimitivePy(PrimitivePy *prim) {
if (pynative_short_life_primitives_.find(prim) == pynative_short_life_primitives_.end()) {
return;
}
pynative_short_life_primitives_.erase(prim);
MS_LOG(DEBUG) << "Erase pynative tmp primitive:" << prim->ToString();
ErasePrimitivePy(prim);
}
void MemoryCleaner::ClearPynativeShortLifePrimitivePy() {
for (auto &primitive : pynative_short_life_primitives_) {
ErasePynativeShortLifePrimitivePy(primitive);
// If the primitives name sequence never been released before, keep the primtives alive
if (std::find(pynative_released_history_.begin(), pynative_released_history_.end(),
pynative_new_primtives_squence_) == pynative_released_history_.end()) {
pynative_released_history_.push_back(pynative_new_primtives_squence_);
} else {
for (auto &primitive : pynative_short_life_primitives_) {
ReleasePrimitivePyObj(primitive);
}
}
pynative_short_life_primitives_.clear();
pynative_new_primtives_squence_.clear();
}
void MemoryCleaner::EnterPynativeConstructProcess() { pynative_in_construct_process_ = true; }
@ -348,23 +381,5 @@ bool MemoryCleaner::IsInPynativeConstructProcess() const { return pynative_in_co
void MemoryCleaner::EnterPynativeEndGraphProcess() { pynative_in_end_graph_process_ = true; }
void MemoryCleaner::LeavePynativeEndGraphProcess() { pynative_in_end_graph_process_ = false; }
bool MemoryCleaner::IsInPynativeEndGraphProcess() const { return pynative_in_end_graph_process_; }
void Resource::Clean() {
// AbstractTensor->elements() will be saved in AbstractBasePtrList
args_spec_.clear();
input_ = py::none();
// Context with AbstractBasePtrList may be saved in GraphEvaluator
// some Evaluator like ResolveEvaluator may save Python object in cache,
// it should be cleaned before Python Interpreter destructed.
MS_EXCEPTION_IF_NULL(engine_);
engine_->ClearEvaluatorCache();
// clean static variable to prevent from crash. As static variable is released after
// Python threads is released.
parse::data_converter::ClearObjectCache();
parse::Parser::CleanParserResource();
parse::CleanDataClassToClassMap();
trace::ClearTraceStack();
is_cleaned_ = true;
}
} // namespace pipeline
} // namespace mindspore

View File

@ -57,8 +57,10 @@ class MemoryCleaner {
public:
MemoryCleaner() = default;
~MemoryCleaner() = default;
void Init();
void RecordPrimitivePy(PrimitivePy *prim);
void ErasePrimitivePy(PrimitivePy *prim);
void ReleasePrimitivePyObj(PrimitivePy *prim);
void ClearPrimitivePyPythonObj();
void RecordPynativeShortLifePrimitivePy(PrimitivePy *prim);
@ -77,6 +79,9 @@ class MemoryCleaner {
// PrimitivePy objects that created in pynative construct process.These primitives should be released after construct
// finished.
std::unordered_set<PrimitivePy *> pynative_short_life_primitives_;
// Sequence of primtive names in one construct process.
std::vector<std::string> pynative_new_primtives_squence_;
std::vector<std::vector<std::string>> pynative_released_history_;
bool pynative_in_construct_process_{false};
bool pynative_in_end_graph_process_{false};
};

View File

@ -56,6 +56,7 @@ PrimitivePy::PrimitivePy(const py::str &name, const py::object &python_obj)
: Primitive(name, false), python_obj_(python_obj), signatures_() {
auto &mem_cleaner = pipeline::Resource::mem_cleaner();
mem_cleaner.RecordPrimitivePy(this);
MS_LOG(DEBUG) << "New primitive:" << name;
if (mem_cleaner.IsInPynativeConstructProcess() && !mem_cleaner.IsInPynativeEndGraphProcess()) {
mem_cleaner.RecordPynativeShortLifePrimitivePy(this);
}
@ -63,7 +64,7 @@ PrimitivePy::PrimitivePy(const py::str &name, const py::object &python_obj)
PrimitivePy::~PrimitivePy() {
// Erase primitive here to set released flag false, to avoid calling released pointer when clear primitives in
// resource.
pipeline::Resource::mem_cleaner().ErasePrimitivePy(this);
pipeline::Resource::mem_cleaner().ReleasePrimitivePyObj(this);
MS_LOG(DEBUG) << "Release:" << ToString();
}
void PrimitivePy::SetPyObj(const py::object &obj) { python_obj_ = obj; }
@ -327,6 +328,10 @@ py::dict PrimitivePy::RunInfer(const py::tuple &args) {
if (!HasPyObj()) {
MS_LOG(EXCEPTION) << "[" << this->ToString() << "]: pyobj is empty";
}
// Python obj could be replaced as None, so it will losed the original info when throw exception in python.
if (!py::hasattr(python_obj_, PY_PRIM_METHOD_INFER)) {
MS_LOG(EXCEPTION) << "prim:" << ToString() << " has no attr:" << PY_PRIM_METHOD_INFER;
}
auto infer_fuc = python_obj_.attr(PY_PRIM_METHOD_INFER);
return infer_fuc(*args);
}
@ -335,6 +340,10 @@ void PrimitivePy::RunCheck(const py::tuple &args) {
if (!HasPyObj()) {
MS_LOG(EXCEPTION) << "[" << this->ToString() << "]: pyobj is empty";
}
// Python obj could be replaced as None, so it will losed the original info when throw exception in python.
if (!py::hasattr(python_obj_, PY_PRIM_METHOD_CHECK)) {
MS_LOG(EXCEPTION) << "prim:" << ToString() << " has no attr:" << PY_PRIM_METHOD_CHECK;
}
auto check_func = python_obj_.attr(PY_PRIM_METHOD_CHECK);
(void)check_func(*args);
}
@ -343,6 +352,10 @@ py::object PrimitivePy::RunInferValue(const py::tuple &args) {
if (!HasPyObj()) {
MS_LOG(EXCEPTION) << "[" << this->ToString() << "]: pyobj is empty";
}
// Python obj could be replaced as None, so it will losed the original info when throw exception in python.
if (!py::hasattr(python_obj_, PY_PRIM_METHOD_INFER_VALUE)) {
MS_LOG(EXCEPTION) << "prim:" << ToString() << " has no attr:" << PY_PRIM_METHOD_INFER_VALUE;
}
auto infer_value = python_obj_.attr(PY_PRIM_METHOD_INFER_VALUE);
return infer_value(*args);
}

View File

@ -24,7 +24,7 @@ from mindspore import context
from mindspore import log as logger
from .tensor import Tensor as MsTensor
from .._c_expression import generate_key, Executor_, Tensor, MetaTensor, PynativeExecutor_
from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_backend
from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_pipeline
from ..parallel._ps_context import _is_role_pserver
from ..parallel._utils import _get_device_num, _get_global_rank, _need_to_full, _check_full_batch, _to_full_tensor, \
_get_parameter_broadcast
@ -195,7 +195,7 @@ class _MindSporeFunction:
@_wrap_func
def __call__(self, *args):
init_backend()
init_pipeline()
converted, arguments_dict, parse_method = _convert_function_arguments(self.fn, *args)
if not converted:
raise RuntimeError('Process function parameter is failure')

View File

@ -24,7 +24,7 @@ import numpy
from mindspore import log as logger
from mindspore.common.parameter import PARAMETER_NAME_DEFAULT
from .. import context
from .._c_expression import init_backend, Cell_
from .._c_expression import init_pipeline, Cell_
from .._checkparam import Validator
from ..common import dtype as mstype
from ..common.api import _executor, _pynative_exec
@ -90,7 +90,7 @@ class Cell(Cell_):
self._parameter_layout_dict = {}
self._create_time = int(time.time() * 1e9)
self.phase_prefix = ""
init_backend()
init_pipeline()
# call gc to release GE session resources used by non-used cell objects
if os.getenv('GC_COLLECT_IN_CELL') == '1':