From a05c38bb63b009ff7cb39efeddd85be3fa031dec Mon Sep 17 00:00:00 2001 From: Wei Luning Date: Wed, 29 Jul 2020 14:14:47 +0800 Subject: [PATCH] make python Parameter inherit from Tensor --- mindspore/_extends/parse/__init__.py | 4 +- mindspore/_extends/parse/parser.py | 10 - .../session/ascend_inference_session.cc | 2 +- .../ccsrc/backend/session/session_basic.cc | 11 +- mindspore/ccsrc/debug/anf_ir_utils.cc | 2 +- mindspore/ccsrc/debug/draw.cc | 5 +- .../frontend/parallel/graph_util/node_info.cc | 7 +- .../frontend/parallel/step_auto_parallel.cc | 15 +- .../ccsrc/frontend/parallel/step_parallel.cc | 38 +-- mindspore/ccsrc/pipeline/jit/action.cc | 3 +- mindspore/ccsrc/pipeline/jit/init.cc | 2 + .../pipeline/jit/parse/data_converter.cc | 41 +--- .../ccsrc/pipeline/jit/parse/parse_base.h | 1 - mindspore/ccsrc/pipeline/jit/parse/resolve.cc | 5 +- mindspore/ccsrc/pipeline/jit/pipeline.cc | 24 +- mindspore/ccsrc/pipeline/jit/pipeline.h | 2 + .../pipeline/pynative/pynative_execute.cc | 17 +- mindspore/ccsrc/transform/onnx/ir_exporter.cc | 2 +- .../ccsrc/transform/onnx/onnx_exporter.cc | 2 +- mindspore/ccsrc/utils/callbacks_ge.cc | 2 +- mindspore/ccsrc/utils/convert_utils.cc | 2 +- .../ccsrc/utils/load_onnx/anf_model_parser.cc | 5 +- mindspore/ccsrc/utils/param_value_py.cc | 5 +- mindspore/ccsrc/utils/tensor_py.cc | 2 +- mindspore/common/api.py | 42 +--- mindspore/common/parameter.py | 220 +++++++++++++----- mindspore/common/tensor.py | 2 +- mindspore/core/ir/anf.h | 6 +- mindspore/core/ir/param_value.h | 4 - mindspore/lite/src/param_value_lite.h | 6 +- mindspore/nn/cell.py | 75 +++--- mindspore/nn/optim/optimizer.py | 2 +- mindspore/train/serialization.py | 5 +- .../cv/mobilenetv2/src/mobilenetV2.py | 1 + .../cv/mobilenetv2_quant/src/mobilenetV2.py | 2 + .../cv/mobilenetv3/src/mobilenetV3.py | 2 + .../models/bert/test_bert_graph_kernel.py | 2 +- .../models/bert/test_bert_tdt_lossscale.py | 4 +- .../models/deeplabv3/test_deeplabv3.py | 1 + .../models/resnet50/test_resnet50_imagenet.py | 11 +- .../insert_memcpy_async_for_hccl_op_test.cc | 3 - .../cpp/session/anf_runtime_algorithm_test.cc | 2 +- tests/ut/cpp/session/kernel_graph_test.cc | 2 +- tests/ut/python/ir/test_tensor.py | 2 +- tests/ut/python/nn/test_parameter.py | 25 +- tests/ut/python/train/quant/test_quant.py | 9 +- 46 files changed, 355 insertions(+), 282 deletions(-) diff --git a/mindspore/_extends/parse/__init__.py b/mindspore/_extends/parse/__init__.py index 323932560a..10a991c1ed 100644 --- a/mindspore/_extends/parse/__init__.py +++ b/mindspore/_extends/parse/__init__.py @@ -21,12 +21,12 @@ from .parser import (Parser, create_obj_instance, generate_scope, get_class_member_namespace_symbol, create_slice_obj, get_dataclass_attributes, get_dataclass_methods, get_obj_id, get_module_namespace, get_obj_type, get_object_key, - get_default_input, get_parse_method_of_class, get_scope_name, + get_parse_method_of_class, get_scope_name, is_class_member, parse_cb, resolve_symbol) from .serialize import * __all__ = ['parse_cb', 'get_parse_method_of_class', 'get_bprop_method_of_class', 'resolve_symbol', - 'get_object_key', 'get_default_input', 'get_class_instance_type', 'is_class_member', + 'get_object_key', 'get_class_instance_type', 'is_class_member', 'get_obj_type', 'get_obj_id', 'create_obj_instance', 'get_module_namespace', 'get_class_member_namespace_symbol', 'get_obj_id', 'Parser', 'get_dataclass_attributes', 'get_dataclass_methods', 'dump_obj', 'load_obj', 'get_dataclass_methods', 'get_scope_name', diff --git a/mindspore/_extends/parse/parser.py b/mindspore/_extends/parse/parser.py index 180edf0fb4..695f449f83 100644 --- a/mindspore/_extends/parse/parser.py +++ b/mindspore/_extends/parse/parser.py @@ -206,16 +206,6 @@ def get_object_key(obj): return obj_id, obj_key -def get_default_input(obj): - if hasattr(obj, '__parameter__'): - return obj.default_input - if isinstance(obj, tuple): - convert = lambda x: x.default_input if hasattr(x, '__parameter__') else x - args = tuple(convert(x) for x in obj) - return args - return obj - - def is_class_member(node): """Check the attr is class member variable.""" type_ = node.__class__.__name__ diff --git a/mindspore/ccsrc/backend/session/ascend_inference_session.cc b/mindspore/ccsrc/backend/session/ascend_inference_session.cc index ee0816ee9e..ae23bb4594 100644 --- a/mindspore/ccsrc/backend/session/ascend_inference_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_inference_session.cc @@ -76,7 +76,7 @@ GraphId AscendInferenceSession::CompileGraph(NotNull func_graph) { if (AnfAlgo::IsParameterWeight(pk_node)) { const auto ¶m_value = pk_node->default_param(); MS_EXCEPTION_IF_NULL(param_value); - auto tensor = std::dynamic_pointer_cast(param_value->value()); + auto tensor = std::dynamic_pointer_cast(param_value); MS_EXCEPTION_IF_NULL(tensor); if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0), LongToSize(tensor->data().nbytes()), tensor->data_type(), diff --git a/mindspore/ccsrc/backend/session/session_basic.cc b/mindspore/ccsrc/backend/session/session_basic.cc index dfde9336b2..2ba38d3cae 100644 --- a/mindspore/ccsrc/backend/session/session_basic.cc +++ b/mindspore/ccsrc/backend/session/session_basic.cc @@ -42,12 +42,12 @@ namespace mindspore { namespace session { -static std::shared_ptr> python_paras; +static std::shared_ptr> python_paras; void ClearPythonParasMap() { python_paras = nullptr; } namespace { const int kSummaryGetItem = 2; -ParamValuePtr GetParamDefaultValue(const AnfNodePtr &node) { +ValuePtr GetParamDefaultValue(const AnfNodePtr &node) { if (node == nullptr) { return nullptr; } @@ -209,8 +209,7 @@ ParameterPtr ConstructRunOpParameter(const std::shared_ptr &graph, auto param = graph->NewParameter(); MS_EXCEPTION_IF_NULL(param); if (tensor_mask == kParameterWeightTensorMask) { - auto param_value_new = std::make_shared(); - param->set_default_param(param_value_new); + param->set_default_param(input_tensor); } // set the kernel info of parameter auto kernel_build_info_builder = std::make_shared(); @@ -390,7 +389,7 @@ ParameterPtr SessionBasic::CreateNewParameterFromParameter(const AnfNodePtr &anf ParameterPtr new_parameter = nullptr; // if parameter's python parameter has been exist a backend parameter, reuse the exist parameter if (python_paras == nullptr) { - python_paras = std::make_shared>(); + python_paras = std::make_shared>(); } auto iter = python_paras->find(param_value); if (iter != python_paras->end()) { @@ -667,7 +666,7 @@ ParameterPtr SessionBasic::CreateNewParameter(const AnfNodePtr &anf, KernelGraph auto param_value = GetParamDefaultValue(anf); ParameterPtr new_parameter = nullptr; if (python_paras == nullptr) { - python_paras = std::make_shared>(); + python_paras = std::make_shared>(); } auto iter = python_paras->find(param_value); if (iter != python_paras->end()) { diff --git a/mindspore/ccsrc/debug/anf_ir_utils.cc b/mindspore/ccsrc/debug/anf_ir_utils.cc index 6c7e1b4d64..7d4380c4d2 100644 --- a/mindspore/ccsrc/debug/anf_ir_utils.cc +++ b/mindspore/ccsrc/debug/anf_ir_utils.cc @@ -1670,7 +1670,7 @@ class IrParser { // load parameter default value from serialized file py::object default_obj = LoadObject(lexer_.GetTokenText()); - auto param_value_new = py::cast(default_obj); + auto param_value_new = py::cast(default_obj); param->set_default_param(param_value_new); tok = lexer_.GetNextToken(); diff --git a/mindspore/ccsrc/debug/draw.cc b/mindspore/ccsrc/debug/draw.cc index da0bd7021b..09ded5118a 100644 --- a/mindspore/ccsrc/debug/draw.cc +++ b/mindspore/ccsrc/debug/draw.cc @@ -318,8 +318,9 @@ void BaseDigraph::FuncGraphParameters(const FuncGraphPtr &key) { buffer_ << parameter->ToString(); auto param = parameter->cast(); if (param->has_default()) { - auto tensor = param->default_param()->value(); - if (tensor) { + auto tensor_v = param->default_param(); + if (tensor_v && tensor_v->isa()) { + auto tensor = tensor_v->cast(); auto &shape = tensor->shape(); std::ostringstream shape_str; std::copy(shape.begin(), shape.end(), std::ostream_iterator(shape_str, ",")); diff --git a/mindspore/ccsrc/frontend/parallel/graph_util/node_info.cc b/mindspore/ccsrc/frontend/parallel/graph_util/node_info.cc index e50df2818b..5d9c999b8b 100644 --- a/mindspore/ccsrc/frontend/parallel/graph_util/node_info.cc +++ b/mindspore/ccsrc/frontend/parallel/graph_util/node_info.cc @@ -38,7 +38,12 @@ bool ParameterRequireGrad(const AnfNodePtr &node_ptr) { if (!para_ptr->has_default()) { return false; } - return para_ptr->default_param()->requires_grad(); + auto obj = py::cast(para_ptr->default_param()); + auto param_value = py::cast(obj.attr("_value")); + if (param_value == nullptr) { + return false; + } + return param_value->requires_grad(); } } // namespace parallel } // namespace mindspore diff --git a/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc b/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc index 4a2da20024..3059f61691 100644 --- a/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc +++ b/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc @@ -41,6 +41,7 @@ #include "frontend/parallel/context.h" #include "frontend/parallel/ops_info/tmp_identity_info.h" #include "frontend/parallel/ops_info/reshape_info.h" +#include "frontend/parallel/graph_util/node_info.h" #include "frontend/parallel/step_parallel.h" #include "frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h" #include "pipeline/jit/parse/python_adapter.h" @@ -122,12 +123,7 @@ std::vector ExtractInputParameterByNode(const CNodePtr &node) { if (input->isa()) { auto input_parameter = input->cast(); - if (input_parameter->has_default()) { - bool requires_grad = input_parameter->default_param()->requires_grad(); - is_parameter.push_back(requires_grad); - } else { - is_parameter.push_back(false); - } + is_parameter.push_back(ParameterRequireGrad(input_parameter)); } else if (input->isa() || IsValueNode(input) || IsValueNode(input)) { is_parameter.push_back(false); } @@ -798,12 +794,7 @@ void AugmentCostGraph(const std::vector &all_nodes) { std::vector is_parameter; auto casted_target_parameter = target_parameter->cast(); MS_EXCEPTION_IF_NULL(casted_target_parameter); - if (casted_target_parameter->has_default()) { - bool requires_grad = casted_target_parameter->default_param()->requires_grad(); - is_parameter.push_back(requires_grad); - } else { - is_parameter.push_back(false); - } + is_parameter.push_back(ParameterRequireGrad(casted_target_parameter)); if (tmp_identity_ptr->set_is_parameter(is_parameter) != SUCCESS) { MS_LOG(EXCEPTION) << "Setting parameter for TmpIdentityInfo failed"; } diff --git a/mindspore/ccsrc/frontend/parallel/step_parallel.cc b/mindspore/ccsrc/frontend/parallel/step_parallel.cc index 9d407eba60..a5003b8a4d 100644 --- a/mindspore/ccsrc/frontend/parallel/step_parallel.cc +++ b/mindspore/ccsrc/frontend/parallel/step_parallel.cc @@ -1295,11 +1295,8 @@ void CoverSliceShape(const FuncGraphPtr &root) { g_RefMap.clear(); } -bool ParameterIsCloned(const FuncGraphPtr &root, const AnfNodePtr ¶meter_node) { - MS_EXCEPTION_IF_NULL(root); +bool ParameterIsCloned(const AnfNodePtr ¶meter_node) { MS_EXCEPTION_IF_NULL(parameter_node); - FuncGraphManagerPtr manager = root->manager(); - MS_EXCEPTION_IF_NULL(manager); auto cloned_parameter = parameter_node->cast(); MS_EXCEPTION_IF_NULL(cloned_parameter); @@ -1307,8 +1304,12 @@ bool ParameterIsCloned(const FuncGraphPtr &root, const AnfNodePtr ¶meter_nod if (!cloned_parameter->has_default()) { return false; } - - bool cloned = cloned_parameter->default_param()->cloned(); + auto obj = py::cast(cloned_parameter->default_param()); + auto param_value = py::cast(obj.attr("_value")); + if (param_value == nullptr) { + return false; + } + bool cloned = param_value->cloned(); if (!cloned) { return false; } @@ -1324,12 +1325,16 @@ void SetClonedTensorShapeForOptimizer(const FuncGraphPtr &root) { auto cloned_parameter = cloned_parameter_node->cast(); MS_EXCEPTION_IF_NULL(cloned_parameter); - if (!ParameterIsCloned(root, cloned_parameter_node)) { + if (!ParameterIsCloned(cloned_parameter_node)) { + continue; + } + auto obj = py::cast(cloned_parameter->default_param()); + auto param_value = py::cast(obj.attr("_value")); + if (param_value == nullptr) { continue; } - // get the cloned index - int32_t cloned_index = cloned_parameter->default_param()->cloned_index(); + int32_t cloned_index = param_value->cloned_index(); // find the be cloned parameter bool found_be_cloned_parameter = false; @@ -1344,12 +1349,18 @@ void SetClonedTensorShapeForOptimizer(const FuncGraphPtr &root) { } const auto ¶m_value_cloned = be_cloned_parameter->default_param(); - if (!param_value_cloned->be_cloned()) { + + auto obj_in = py::cast(param_value_cloned); + auto param_value_in = py::cast(obj_in.attr("_value")); + if (param_value_in == nullptr) { + continue; + } + if (!param_value_in->be_cloned()) { continue; } // get the be cloned index - auto &be_cloned_index = param_value_cloned->be_cloned_index(); + auto &be_cloned_index = param_value_in->be_cloned_index(); if (std::find(be_cloned_index.begin(), be_cloned_index.end(), cloned_index) != be_cloned_index.end()) { found_be_cloned_parameter = true; cloned_from_parameter = be_cloned_parameter; @@ -2103,10 +2114,7 @@ std::string NodeParameterName(const CNodePtr &node) { if (input->isa()) { auto input_parameter = input->cast(); if (input_parameter->has_default()) { - const auto ¶m_value = input_parameter->default_param(); - if (param_value->requires_grad()) { - return param_value->name(); - } + input_parameter->name(); } } } diff --git a/mindspore/ccsrc/pipeline/jit/action.cc b/mindspore/ccsrc/pipeline/jit/action.cc index 9fb3dc59d9..61314a459f 100644 --- a/mindspore/ccsrc/pipeline/jit/action.cc +++ b/mindspore/ccsrc/pipeline/jit/action.cc @@ -233,8 +233,7 @@ bool AbstractSpecializeAction(const ResourcePtr &res) { for (const auto ¶m : func_graph->parameters()) { auto param_node = std::static_pointer_cast(param); if (param_node->has_default()) { - const auto ¶m_value = param_node->default_param(); - ValuePtr value = param_value->value(); + ValuePtr value = param_node->default_param(); constexpr bool broaden = true; AbstractBasePtr ptr = abstract::FromValue(value, broaden); diff --git a/mindspore/ccsrc/pipeline/jit/init.cc b/mindspore/ccsrc/pipeline/jit/init.cc index 1d3332f73b..e6f4fec4ce 100644 --- a/mindspore/ccsrc/pipeline/jit/init.cc +++ b/mindspore/ccsrc/pipeline/jit/init.cc @@ -68,6 +68,8 @@ PYBIND11_MODULE(_c_expression, m) { py::arg("type") = py::str("onnx_ir"), "Get graph proto string by specifying ir type.") .def("compile", &ExecutorPy::Compile, py::arg("obj"), py::arg("args"), py::arg("phase") = py::str(""), py::arg("use_vm") = py::bool_(false), "Compile obj by executor.") + .def("updata_param_node_default_input", &ExecutorPy::UpdataParamNodeDefaultInput, py::arg("phase"), + py::arg("params"), "Fetch the inputs of Conv or Matmul for quant export.") .def("get_parameter_layout", &ExecutorPy::GetParameterLayout, py::arg("phase") = py::str("train"), "Get Parameter Tensor Layout Dictionary.") .def("get_strategy", &ExecutorPy::GetCNodeStrategy, py::arg("phase") = py::str("train"), diff --git a/mindspore/ccsrc/pipeline/jit/parse/data_converter.cc b/mindspore/ccsrc/pipeline/jit/parse/data_converter.cc index ae68bf97c7..112b99631b 100644 --- a/mindspore/ccsrc/pipeline/jit/parse/data_converter.cc +++ b/mindspore/ccsrc/pipeline/jit/parse/data_converter.cc @@ -205,41 +205,6 @@ bool ConvertMetaFuncGraph(const py::object &obj, ValuePtr *const data, bool use_ return true; } -bool ConvertDataType(const py::object &obj, ValuePtr *const data) { - MS_LOG(DEBUG) << "Converting type object"; - auto typeptr = obj.cast(); - if (typeptr == nullptr) { - MS_LOG(ERROR) << "Resolve TypePtr error, get ptr is null"; - return false; - } - *data = typeptr; - return true; -} - -bool ConvertMetaTensor(const py::object &obj, ValuePtr *const data) { - MS_LOG(DEBUG) << "Converting MetaTensor object."; - - auto m_tensor = obj.cast(); - if (m_tensor == nullptr) { - MS_LOG(ERROR) << "Resolve MetaTensor error, get ptr is null."; - return false; - } - *data = m_tensor; - return true; -} - -bool ConvertTensor(const py::object &obj, ValuePtr *const data) { - MS_LOG(DEBUG) << "Converting tensor object"; - - auto m_tensor = obj.cast(); - if (m_tensor == nullptr) { - MS_LOG(ERROR) << "Resolve Tensor error, get ptr is null"; - return false; - } - *data = m_tensor; - return true; -} - bool ConvertSlice(const py::object &obj, ValuePtr *const data) { MS_LOG(DEBUG) << "Converting slice object"; @@ -364,11 +329,11 @@ bool ConvertData(const py::object &obj, ValuePtr *const data, bool use_signature } else if (py::isinstance(obj)) { ret = ConvertMetaFuncGraph(obj, &converted, use_signature); } else if (py::isinstance(obj)) { - ret = ConvertDataType(obj, &converted); + converted = obj.cast(); } else if (py::isinstance(obj)) { - ret = ConvertTensor(obj, &converted); + converted = obj.cast(); } else if (py::isinstance(obj)) { - ret = ConvertMetaTensor(obj, &converted); + converted = obj.cast(); } else if (py::isinstance(obj)) { std::shared_ptr env = obj.cast>(); converted = env; diff --git a/mindspore/ccsrc/pipeline/jit/parse/parse_base.h b/mindspore/ccsrc/pipeline/jit/parse/parse_base.h index 1750fe7309..d3c6851eda 100644 --- a/mindspore/ccsrc/pipeline/jit/parse/parse_base.h +++ b/mindspore/ccsrc/pipeline/jit/parse/parse_base.h @@ -85,7 +85,6 @@ const char PYTHON_PARSE_ANALYZE_SUPER[] = "analyze_super"; const char PYTHON_PARSE_CLASS_SLICE[] = "create_slice_obj"; const char PYTHON_PARSE_CLASS_ELLIPSIS[] = "create_ellipsis_obj"; -const char PYTHON_MOD_GET_DEFAULT_INPUT[] = "get_default_input"; // define the common name const char NAMED_PRIMITIVE_LEN[] = "len"; diff --git a/mindspore/ccsrc/pipeline/jit/parse/resolve.cc b/mindspore/ccsrc/pipeline/jit/parse/resolve.cc index 228c9ae184..48f3a24652 100644 --- a/mindspore/ccsrc/pipeline/jit/parse/resolve.cc +++ b/mindspore/ccsrc/pipeline/jit/parse/resolve.cc @@ -103,10 +103,9 @@ AnfNodePtr ResolveParameterObj(const FuncGraphPtr &func_graph, const py::object } if (para_node == nullptr) { auto node = top_graph->AddWeightParameter(param_name); - auto param_value = py::cast(python_adapter::GetPyObjAttr(obj, "_value")); - node->set_default_param(param_value); + auto value = py::cast(obj); + node->set_default_param(value); // set_abstract for parameter - ValuePtr value = param_value->value(); constexpr bool broaden = true; node->set_abstract(abstract::FromValue(value, broaden)); para_node = node; diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.cc b/mindspore/ccsrc/pipeline/jit/pipeline.cc index 42539f27ed..58715563c2 100644 --- a/mindspore/ccsrc/pipeline/jit/pipeline.cc +++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc @@ -719,7 +719,11 @@ void ProcessVmArgInner(const py::tuple &args, const ResourcePtr &res, VectorRef if (!param_ptr->has_default()) { MS_LOG(EXCEPTION) << "Parameter[" << i << "] has no default param"; } - arg_list->push_back(param_ptr->default_param()->value()); + if (!param_ptr->default_param()->isa()) { + MS_LOG(EXCEPTION) << "Parameter[" << param_ptr->ToString() + << "] is not initialized, need to call `.init_data()`"; + } + arg_list->push_back(param_ptr->default_param()); } } } @@ -782,6 +786,24 @@ FuncGraphPtr ExecutorPy::BuildGraph(const py::dict &init_params, const std::stri #endif } +void ExecutorPy::UpdataParamNodeDefaultInput(const std::string &phase, + const std::unordered_map ¶ms_value) { + FuncGraphPtr func_graph = info_[phase]->resource->func_graph(); + MS_EXCEPTION_IF_NULL(func_graph); + MS_LOG(DEBUG) << "UpdataParamNodeDefaultInput for func graph(" << func_graph->ToString() << ") phase(" << phase + << ")!"; + auto ¶ms = func_graph->parameters(); + for (const auto ¶m : params) { + MS_EXCEPTION_IF_NULL(param); + auto param_cast = param->cast(); + MS_EXCEPTION_IF_NULL(param_cast); + auto iter = params_value.find(param_cast->name()); + if (iter != params_value.end()) { + param_cast->set_default_param(iter->second); + } + } +} + void ExecutorPy::RunInitGraph(const py::dict &init_params, const std::string &phase) { #if ENABLE_GE RunGEInitGraph(init_params, phase); diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.h b/mindspore/ccsrc/pipeline/jit/pipeline.h index f4fabd6115..0dac6e94a9 100644 --- a/mindspore/ccsrc/pipeline/jit/pipeline.h +++ b/mindspore/ccsrc/pipeline/jit/pipeline.h @@ -88,6 +88,8 @@ class ExecutorPy : public std::enable_shared_from_this { FuncGraphPtr BuildGraph(const py::dict &init_params, const std::string &phase, const py::object &broadcast_params = {}); + void UpdataParamNodeDefaultInput(const std::string &phase, + const std::unordered_map ¶ms); void RunInitGraph(const py::dict &init_params, const std::string &phase); py::dict GetParameterLayout(const std::string &phase); py::dict GetCNodeStrategy(const std::string &phase); diff --git a/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc index 97e9e47a16..4bfe438b2f 100644 --- a/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc +++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc @@ -146,12 +146,6 @@ static std::string GetOpId(const OpExecInfoPtr &op_exec_info) { return id; } -py::object GetTupleObj(const py::object &obj) { - py::module mod = parse::python_adapter::GetPyModule(parse::PYTHON_MOD_PARSE_MODULE); - py::object obj_tuple = parse::python_adapter::CallPyModFn(mod, parse::PYTHON_MOD_GET_DEFAULT_INPUT, obj); - return obj_tuple; -} - std::map> GetTypeIndex(const std::vector &dtypes) { std::map> type_indexes; for (size_t i = 0; i < dtypes.size(); ++i) { @@ -242,7 +236,7 @@ py::tuple ConvertInputs(const PrimitivePyPtr &prim, const py::list &args, py::tu py::tuple input_mask(args.size()); for (size_t i = 0; i < args.size(); ++i) { input_mask[i] = py::hasattr(args[i], "__parameter__"); - py_args[i] = GetTupleObj(args[i]); + py_args[i] = args[i]; } auto signature = prim->signatures(); std::vector dtypes; @@ -366,9 +360,6 @@ py::object RunOpInVM(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *stat py::tuple result(op_inputs.size()); for (size_t i = 0; i < op_inputs.size(); i++) { py::object input = op_inputs[i]; - if (py::hasattr(input, "__parameter__")) { - input = py::getattr(input, "data"); - } auto tensor = py::cast(input); auto new_tensor = std::make_shared(tensor->data_type(), tensor->shape(), tensor->data_ptr()); new_tensor->set_device_address(tensor->device_address()); @@ -878,8 +869,7 @@ AnfNodePtr PynativeExecutor::GetInput(const py::object &obj, const py::object &o if (graph_info_map_[df_builder_].param_map.count(obj_id) == 0) { auto free_param = df_builder_->add_parameter(); free_param->set_name(param_name); - auto free_param_new = py::cast(obj.attr("_value")); - free_param->set_default_param(free_param_new); + free_param->set_default_param(py::cast(obj)); free_param->debug_info()->set_name(param_name); MS_LOG(DEBUG) << "Top graph set free parameter " << obj_id; graph_info_map_[df_builder_].param_map[obj_id] = free_param; @@ -1074,8 +1064,7 @@ abstract::AbstractBasePtrList PynativeExecutor::GetArgsSpec(const py::args &args for (const auto ¶m : df_builder_->parameters()) { auto param_node = std::static_pointer_cast(param); if (param_node->has_default()) { - const auto ¶m_value = param_node->default_param(); - ValuePtr value = param_value->value(); + ValuePtr value = param_node->default_param(); AbstractBasePtr ptr = abstract::FromValue(value, true); if (ptr == nullptr) { MS_LOG(EXCEPTION) << "Args convert error"; diff --git a/mindspore/ccsrc/transform/onnx/ir_exporter.cc b/mindspore/ccsrc/transform/onnx/ir_exporter.cc index 78858eea8a..60a1cdab28 100644 --- a/mindspore/ccsrc/transform/onnx/ir_exporter.cc +++ b/mindspore/ccsrc/transform/onnx/ir_exporter.cc @@ -187,7 +187,7 @@ void IrExportBuilder::BuildParameters(const FuncGraphPtr &func_graph, onnx::Grap onnx::TensorProto *initializer_proto = graph_proto->add_initializer(); initializer_proto->set_name(param_name); SetParamToTensorProto(param, initializer_proto); - auto tensor = std::dynamic_pointer_cast(param->default_param()->value()); + auto tensor = std::dynamic_pointer_cast(param->default_param()); if (tensor) { initializer_proto->set_raw_data(tensor->data_c(), tensor->data().nbytes()); } diff --git a/mindspore/ccsrc/transform/onnx/onnx_exporter.cc b/mindspore/ccsrc/transform/onnx/onnx_exporter.cc index f69fb81a7e..8f0b72f78e 100644 --- a/mindspore/ccsrc/transform/onnx/onnx_exporter.cc +++ b/mindspore/ccsrc/transform/onnx/onnx_exporter.cc @@ -449,7 +449,7 @@ void OnnxExporter::ExportParameters(const FuncGraphPtr &func_graph, onnx::GraphP initializer_proto->set_name(param_ptr->ToString()); SetTensorProtoInfo(param_ptr, initializer_proto); // set value for initializer - auto tensor = std::dynamic_pointer_cast(param_ptr->default_param()->value()); + auto tensor = std::dynamic_pointer_cast(param_ptr->default_param()); if (tensor) { initializer_proto->set_raw_data(tensor->data_c(), tensor->data().nbytes()); } diff --git a/mindspore/ccsrc/utils/callbacks_ge.cc b/mindspore/ccsrc/utils/callbacks_ge.cc index 6001b295ad..a73df1e257 100644 --- a/mindspore/ccsrc/utils/callbacks_ge.cc +++ b/mindspore/ccsrc/utils/callbacks_ge.cc @@ -52,7 +52,7 @@ bool GetParameterShape(const FuncGraphPtr &graph, const std::string ¶m_name, if (param_node->name() == param_name) { TensorPtr tensor; if (param_node->has_default()) { - tensor = std::dynamic_pointer_cast(param_node->default_param()->value()); + tensor = std::dynamic_pointer_cast(param_node->default_param()); } if (tensor == nullptr) { shape->push_back(ONE_SHAPE); diff --git a/mindspore/ccsrc/utils/convert_utils.cc b/mindspore/ccsrc/utils/convert_utils.cc index 3e2b926e77..e017ce8796 100644 --- a/mindspore/ccsrc/utils/convert_utils.cc +++ b/mindspore/ccsrc/utils/convert_utils.cc @@ -448,7 +448,7 @@ bool IsGraphOutputValueNodeOrParameter(const AnfNodePtr &output, const py::tuple if (!param->has_default()) { MS_LOG(EXCEPTION) << "Can not determine value of Parameter " << index << " (" << param->name() << ")"; } - auto tensor = param->default_param()->value(); + auto tensor = param->default_param(); *ret_val = py::cast(tensor); } return true; diff --git a/mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc b/mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc index fa1137e3f6..b592164df9 100644 --- a/mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc +++ b/mindspore/ccsrc/utils/load_onnx/anf_model_parser.cc @@ -124,10 +124,7 @@ bool MSANFModelParser::BuildParameterForFuncGraph(const ParameterPtr &node, cons MS_LOG(EXCEPTION) << "memcpy_s error, errorno" << ret; } - auto param_value = std::make_shared(); - MS_EXCEPTION_IF_NULL(param_value); - param_value->set_value(tensor_info); - node->set_default_param(param_value); + node->set_default_param(tensor_info); } anfnode_build_map_[value_proto.name()] = node; return true; diff --git a/mindspore/ccsrc/utils/param_value_py.cc b/mindspore/ccsrc/utils/param_value_py.cc index c976d41cd2..edf2061dc0 100644 --- a/mindspore/ccsrc/utils/param_value_py.cc +++ b/mindspore/ccsrc/utils/param_value_py.cc @@ -24,22 +24,19 @@ REGISTER_PYBIND_DEFINE(ParamValue, ([](const py::module *m) { (void)py::class_(*m, "ParamValue") .def(py::init()) .def("clone", &ParamValue::Clone) - .def_property("data", &ParamValue::value, &ParamValue::set_value) .def_property("name", &ParamValue::name, &ParamValue::set_name) .def_property("requires_grad", &ParamValue::requires_grad, &ParamValue::set_requires_grad) .def_property("layerwise_parallel", &ParamValue::layerwise_parallel, &ParamValue::set_layerwise_parallel) .def(py::pickle( [](const ParamValue &p) { // __getstate__ - return py::make_tuple(py::cast(p.value()), p.name(), p.requires_grad(), - p.layerwise_parallel()); + return py::make_tuple(p.name(), p.requires_grad(), p.layerwise_parallel()); }, [](const py::tuple &t) { // __setstate__ if (t.size() != 6) { std::runtime_error("Invalid state for ParamValue!"); } ParamValuePtr p = std::make_shared(); - p->set_value(t[0].cast()); p->set_name(t[1].cast()); p->set_requires_grad(t[2].cast()); p->set_layerwise_parallel(t[3].cast()); diff --git a/mindspore/ccsrc/utils/tensor_py.cc b/mindspore/ccsrc/utils/tensor_py.cc index d9875d374b..fe3b7deb6d 100644 --- a/mindspore/ccsrc/utils/tensor_py.cc +++ b/mindspore/ccsrc/utils/tensor_py.cc @@ -372,7 +372,7 @@ REGISTER_PYBIND_DEFINE(Tensor, ([](const py::module *m) { .def(py::pickle( [](const Tensor &t) { // __getstate__ /* Return a tuple that fully encodes the state of the object */ - return py::make_tuple(TensorPy::AsNumpy(t)); + return py::make_tuple(TensorPy::SyncAsNumpy(t)); }, [](const py::tuple &t) { // __setstate__ if (t.size() != 1) { diff --git a/mindspore/common/api.py b/mindspore/common/api.py index 68810355e9..c0fa868f81 100644 --- a/mindspore/common/api.py +++ b/mindspore/common/api.py @@ -255,7 +255,6 @@ def ms_function(fn=None, obj=None, input_signature=None): process_obj = obj if args and not isinstance(args[0], MsTensor) and hasattr(args[0], func.__name__): process_obj = args[0] - args = (x.default_input if hasattr(x, 'default_input') else x for x in args) return _MindSporeFunction(func, input_signature, process_obj)(*args) return staging_specialize @@ -354,28 +353,8 @@ class _Executor: raise RuntimeError("Failure to init and dataset subgraph!") return True - def _build_data_graph(self, obj, params, phase): - if params is None: - self._executor.build_data_graph(obj.parameters_dict(), phase, obj.parameters_broadcast_dict()) - elif isinstance(params, OrderedDict): - self._executor.build_data_graph(params, phase) - else: - raise TypeError('Parameters need OrderedDict type, but got {}'. - format(type(params))) - - def _params_init_data(self, obj, params, auto_parallel_mode=False): - """Init parameters' data.""" - if params is not None: - for key, param in params.items(): - if not auto_parallel_mode: - param.init_data() - elif key not in obj.parameter_layout_dict: - logger.debug("Layout dict does not contain the key %s.", key) - param.init_data(set_sliced=True) - else: - layout = obj.parameter_layout_dict[key] - param.init_data(layout, set_sliced=True) - obj.init_parameters_data(auto_parallel_mode=auto_parallel_mode) + def _build_data_graph(self, obj, phase): + self._executor.build_data_graph(obj.parameters_dict(), phase, obj.parameters_broadcast_dict()) def _set_dataset_mode(self, args_list): """set dataset mode.""" @@ -386,7 +365,7 @@ class _Executor: else: _set_dataset_mode_config('normal') - def compile(self, obj, *args, phase='predict', params=None, do_convert=True, auto_parallel_mode=False): + def compile(self, obj, *args, phase='predict', do_convert=True, auto_parallel_mode=False): """ Compiles graph. @@ -394,7 +373,6 @@ class _Executor: obj (Function/Cell): The function or cell instance need compile. args (tuple): Function or cell input arguments. phase (str): The name of compile phase. Default: 'predict'. - params (OrderedDict): The parameters dictionary used for init data graph. Default: None. do_convert (bool): When set to True, convert ME graph to GE graph after compiling graph. auto_parallel_mode: When set to True, use auto parallel mode to compile graph. @@ -435,10 +413,8 @@ class _Executor: if auto_parallel_mode: obj.parameter_layout_dict = self._executor.get_parameter_layout(phase) - self._params_init_data(obj, params, auto_parallel_mode) - if not enable_debug_runtime or enable_ge: - if auto_parallel_mode: - obj.load_parameter_slice(params) + replace = obj.init_parameters_data(auto_parallel_mode=auto_parallel_mode) + self._updata_param_node_default_input(phase, replace) # set parallel inputs in sink mode if auto_parallel_mode and (args and isinstance(args[0], Tensor) and args[0].virtual_flag): @@ -446,16 +422,20 @@ class _Executor: # the following GE init process is not needed when use vm or ms backend if enable_ge: - self._build_data_graph(obj, params, phase) + self._build_data_graph(obj, phase) if "export" not in phase: init_phase = "init_subgraph" + "." + str(obj.create_time) _exec_init_graph(obj, init_phase) elif not enable_ge and "export" in phase: - self._build_data_graph(obj, params, phase) + self._build_data_graph(obj, phase) return phase, True + def _updata_param_node_default_input(self, phase, replace): + new_param = {x.name: replace[x] for x in replace if id(x) != id(replace[x])} + return self._executor.updata_param_node_default_input(phase, new_param) + def _get_strategy(self, obj): real_phase = self.phase_prefix + obj.phase + '.' + str(obj.create_time) return self._executor.get_strategy(real_phase) diff --git a/mindspore/common/parameter.py b/mindspore/common/parameter.py index 28c7c9fa4d..d3be0b5e5a 100644 --- a/mindspore/common/parameter.py +++ b/mindspore/common/parameter.py @@ -14,7 +14,6 @@ # ============================================================================ """Parameter for cell.""" -import numbers from copy import copy from mindspore import context from .._c_expression import ParamValue @@ -37,10 +36,17 @@ def _check_type(x): return True -class Parameter: +class Parameter(MetaTensor): """ Parameter types of cell models. + After initialized `Parameter` is a subtype of `Tensor`. + + In graph mode, if init `Parameter` by a `Initializer`, the type of Parameter will be a `MetaTensor` + not a `Tensor`. `MetaTensor` only save the shape type info of a tensor with no memory usage. The shape + can be change while compile for auto-parallel. Call `init_data` will return a Tensor Parameter with + initialized data. + Note: Each parameter of Cell is represented by Parameter class. @@ -52,23 +58,85 @@ class Parameter: layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in paralle mode, broadcast and gradients communication would not be applied on parameters. Default: False. """ + __base_type__ = {} + + def __new__(cls, default_input, name, *args, **kwargs): + input_class, *class_init_args = Parameter._get_parameter_new_args(default_input) + new_type = Parameter._get_base_class(input_class) + obj = input_class.__new__(new_type) + input_class.__init__(obj, *class_init_args) + # it's better to make the Initializer a kind of metatensor. + obj.init_mode = None + if isinstance(default_input, Initializer): + obj.init_mode = default_input + return obj + + def __reduce_ex__(self, _): + data = self + if self.init_mode is not None: + data = self.init_mode + else: + # cast to break deep infinit loop while deepcopy + data = Tensor(self) + return ( + Parameter, (data, self.name, self.requires_grad, self.layerwise_parallel)) + def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False): self._value = ParamValue() - self.set_parameter_data(default_input) self.name = name self.requires_grad = requires_grad self.layerwise_parallel = layerwise_parallel + # this flag for tensor copy data. + self.init_flag = False + # this flag is for ge variable copy data. self._is_init = False + self._inited_param = None self._sliced = False self.is_param_ps = False self._cast_type = None self.init_in_server = False - if context.get_context("mode") == context.PYNATIVE_MODE: - self.init_data() + + @staticmethod + def _get_base_class(input_class): + input_class_name = f'Parameter{input_class.__name__}' + if input_class_name in Parameter.__base_type__: + new_type = Parameter.__base_type__[input_class_name] + else: + new_type = type(input_class_name, (Parameter, input_class), {}) + Parameter.__base_type__[input_class_name] = new_type + return new_type + + @staticmethod + def _get_parameter_new_args(data): + """Set `default_input` of current `Parameter`.""" + if isinstance(data, bool): + raise ValueError('Parameter data can not be `bool`') + if isinstance(data, Initializer): + if context.get_context("mode") == context.PYNATIVE_MODE: + # always init data while in pynative mode. + data = data.to_tensor() + else: + return (MetaTensor, data.dtype, data.shape) + if isinstance(data, Tensor): + # make a copy of Tensor to init the parameter + return (Tensor, data.asnumpy(),) + if isinstance(data, int): + return (Tensor, data, mstype.int32) + if isinstance(data, float): + return (Tensor, data, mstype.float32) + return (Tensor, data) + + def __str__(self): + value_str = MetaTensor.__repr__(self) + if isinstance(self, Tensor): + value_str = Tensor.__repr__(self) + return f'Parameter (name={self._value.name}, value={value_str})' def __repr__(self): - format_str = 'Parameter (name={name})' - return format_str.format(name=self._value.name) + value_str = MetaTensor.__repr__(self) + if isinstance(self, Tensor): + value_str = Tensor.__repr__(self) + return f'Parameter (name={self._value.name}, value={value_str})' def __parameter__(self): """For parse check.""" @@ -77,6 +145,13 @@ class Parameter: self.is_param_ps = True self.init_in_server = init_in_server + + @property + def inited_param(self): + """Get the new parameter after call the init_data.""" + return self._inited_param + + @property def name(self): """Get the name of the parameter.""" @@ -157,15 +232,11 @@ class Parameter: x._value.name = prefix + '.' + self._value.name x.is_init = False if init != 'same': - shape = self.default_input.shape - dtype = self.default_input.dtype - if isinstance(init, (str, Initializer, numbers.Number)): - x.init_mode = initializer(init, shape=shape, dtype=dtype) - x.default_input = MetaTensor(dtype, shape) - if context.get_context("mode") == context.PYNATIVE_MODE: - x.init_data() - else: - x.default_input = initializer(init, shape=shape, dtype=dtype) + shape = self.shape + dtype = self.dtype + x.default_input = initializer(init, shape=shape, dtype=dtype) + if context.get_context("mode") == context.PYNATIVE_MODE: + x.init_data() return x @property @@ -195,50 +266,65 @@ class Parameter: @property def default_input(self): - return self._data + return self @default_input.setter def default_input(self, data): - self._data = data - self._value.data = data + self.set_parameter_data(data) - def __add__(self, other): - return self.default_input + other + def _update_tensor_data(self, data): + "Update the parameter by a Tensor." + if isinstance(self, Tensor): + # for Tensor same shape: + return self.assign_value(data) + # create a new tensor + return Parameter(data, self.name, self.requires_grad) - def __sub__(self, other): - return self.default_input - other + def set_parameter_data(self, data, slice_shape=False): + """ + Set `default_input` of current `Parameter`. - def __mul__(self, other): - return self.default_input * other + Args: + data (Union[Tensor, Initializer]): new data. + slice_shape (bool): If slice the Parameter. Default: False. - def __truediv__(self, other): - return self.default_input / other + Retruns: + Parameter, the parameter after set data. + """ + if not isinstance(data, (MetaTensor, Initializer)): + raise ValueError(f"Parameter data must be `Initializer` or a kind of `MetaTensor` " + f"(like `Tensor` or `MetaTensor`). But with type {type(data)}.") + # both not init. + is_incoming_tensor = isinstance(data, Tensor) + is_current_tensor = isinstance(self, Tensor) - def __setitem__(self, index, value): - default_input = self.default_input - default_input[index] = value - return self - - def set_parameter_data(self, data): - """Set `default_input` of current `Parameter`.""" - if isinstance(data, bool): - raise ValueError('Parameter data can not be `bool`') - if isinstance(data, Tensor): - # make a copy of Tensor to init the parameter - data = Tensor(data.asnumpy()) - data.init_flag = False - elif isinstance(data, Initializer): - self.init_mode = data - data = MetaTensor(self.init_mode.dtype, self.init_mode.shape) - elif isinstance(data, int): - data = Tensor(data, dtype=mstype.int32) - elif isinstance(data, float): - data = Tensor(data, dtype=mstype.float32) + if is_incoming_tensor and not is_current_tensor: + raise TypeError("Parameter is a `MetaTensor` and not initializered, `data` for `set_parameter_data`" + "should be a Initializer. If you want to update it by Tensor, call method" + "`init_parameters_data` of `Cell` to init and replace all the Parameter of" + "network, then call this method.") + if tuple(self.shape) != tuple(data.shape): + # If Slice create Parameter shape can be change. + if slice_shape: + self._update_tensor_data(data) + self.sliced = True + else: + raise ValueError(f"Can not change the shape of Parameter which has been initialized." + f" Current shape is {self.shape}, and incoming is {data.shape}.") + if self.dtype != data.dtype: + raise ValueError(f"Can not change the Parameter dtype. Current dtype is {self.set_dtype}" + f", and incoming is {data.dtype}. Use .set_dtype(xxx) to change the dtype.") + if isinstance(data, Initializer): + # The parameter has been initializered, directly update by the data + if is_current_tensor: + self._update_tensor_data(data.to_tensor()) + else: + self.init_mode = data + elif is_incoming_tensor or is_current_tensor: + self._update_tensor_data(data) else: - data = Tensor(data) - data.init_flag = False - - self.default_input = data + raise ValueError(f"Not support to update the Parameter by {data}") + return self def init_data(self, layout=None, set_sliced=False): """ @@ -252,31 +338,37 @@ class Parameter: - slice_shape (list[int]): Shape of slice. set_sliced (bool): True if should set parameter sliced after init the data of initializer. Default: False. + + Returns: + Parameter, Parameter after init data. """ - if isinstance(self.default_input, Tensor): - # skip if data already initialized. - return + if self.init_mode is None: + return self + if self.inited_param is not None: + return self.inited_param if layout is not None: if not isinstance(layout, list): - raise TypeError("The layout should be list! layout is {}." - .format(layout)) + raise TypeError("The layout should be list! layout is {}.".format(layout)) if len(layout) < 3: - raise ValueError("The length of layout must be larger than 3! layout is {}." - .format(layout)) + raise ValueError("The length of layout must be larger than 3! layout is {}.".format(layout)) slice_index = int(_get_slice_index(layout[0], layout[1])) if (self.init_in_server and self.is_param_ps and isinstance(self.init_mode, Initializer)): - self.default_input = self.init_mode.to_tensor(0, [1]) + data = self.init_mode.to_tensor(0, [1]) else: - self.default_input = self.init_mode.to_tensor(slice_index, layout[2]) + data = self.init_mode.to_tensor(slice_index, layout[2]) else: if (self.init_in_server and self.is_param_ps and isinstance(self.init_mode, Initializer)): - self.default_input = self.init_mode.to_tensor(0, [1]) + data = self.init_mode.to_tensor(0, [1]) else: - self.default_input = self.init_mode.to_tensor() + data = self.init_mode.to_tensor() - self.init_mode = None + obj = self._update_tensor_data(data) + if id(obj) != id(self): + self._inited_param = obj + obj.init_mode = None if set_sliced: - self.sliced = True + obj.sliced = True + return obj class ParameterTuple(tuple): diff --git a/mindspore/common/tensor.py b/mindspore/common/tensor.py index 10be325b80..1ad383ad1b 100644 --- a/mindspore/common/tensor.py +++ b/mindspore/common/tensor.py @@ -75,7 +75,7 @@ class Tensor(Tensor_): self._virtual_flag = False def __repr__(self): - return str(self.__str__()) + return str(Tensor_.__str__(self)) def __add__(self, other): out = tensor_operator_registry.get('__add__')(self, other) diff --git a/mindspore/core/ir/anf.h b/mindspore/core/ir/anf.h index 4a9e16731d..1595bed6f2 100644 --- a/mindspore/core/ir/anf.h +++ b/mindspore/core/ir/anf.h @@ -283,11 +283,11 @@ class Parameter : public ANode { std::string fullname_with_scope() override { return name(); }; bool has_default() const { return has_default_; } - void set_default_param(ParamValuePtr param) { + void set_default_param(ValuePtr param) { default_param_ = param; has_default_ = true; } - ParamValuePtr default_param() const { return default_param_; } + ValuePtr default_param() const { return default_param_; } bool operator==(const AnfNode &other) const override { if (!other.isa()) { @@ -303,7 +303,7 @@ class Parameter : public ANode { private: std::string name_; bool has_default_; - ParamValuePtr default_param_; + ValuePtr default_param_; }; using ParameterPtr = std::shared_ptr; diff --git a/mindspore/core/ir/param_value.h b/mindspore/core/ir/param_value.h index d9976ccde2..b1ec94248b 100644 --- a/mindspore/core/ir/param_value.h +++ b/mindspore/core/ir/param_value.h @@ -33,9 +33,6 @@ class ParamValue { virtual ~ParamValue() = default; - tensor::MetaTensorPtr value() const { return value_; } - void set_value(const tensor::MetaTensorPtr &value) { value_ = value; } - const std::string &name() const { return name_; } void set_name(const std::string &name) { name_ = name; } @@ -72,7 +69,6 @@ class ParamValue { } private: - tensor::MetaTensorPtr value_; std::string name_{"Parameter"}; bool requires_grad_{true}; bool layerwise_parallel_{false}; diff --git a/mindspore/lite/src/param_value_lite.h b/mindspore/lite/src/param_value_lite.h index e387cb273c..8f806a4f9c 100644 --- a/mindspore/lite/src/param_value_lite.h +++ b/mindspore/lite/src/param_value_lite.h @@ -36,7 +36,7 @@ struct AnfQuantParam { int32_t numBits; AnfQuantParam() : scale(1.0), zeroPoint(0), min(0.0), max(0.0), narrowRange(false), numBits(8), inited(false) {} }; -class ParamValueLite : public ParamValue { +class ParamValueLite : public Value { public: ParamValueLite() : tensor_addr_(nullptr), tensor_size_(0) {} virtual ~ParamValueLite() = default; @@ -65,6 +65,10 @@ class ParamValueLite : public ParamValue { quant_params_.emplace_back(std::move(quant_param)); } + bool operator==(const Value &other) const override { + this == &other; + } + private: void *tensor_addr_; size_t tensor_size_; diff --git a/mindspore/nn/cell.py b/mindspore/nn/cell.py index b474732c2e..4ef7a0a98f 100755 --- a/mindspore/nn/cell.py +++ b/mindspore/nn/cell.py @@ -229,7 +229,6 @@ class Cell: for item in inputs: if isinstance(item, numpy.ndarray): raise TypeError("cell inputs should not be numpy array.") - self.init_parameters_data() orign_grad = [] if self.requires_grad is True: _pynative_exec.set_grad_flag(True) @@ -350,19 +349,8 @@ class Cell: params (dict): The parameters dictionary used for init data graph. """ if params is None: - for key in self.parameters_dict(): - tensor = self.parameters_dict()[key].data - if key not in self.parameter_layout_dict: - logger.info("layout dict does not contain the key %s", key) - continue - if self.parameters_dict()[key].sliced: - logger.debug("Param %s is already sliced.", key) - continue - layout = self.parameter_layout_dict[key] - new_tensor = _load_tensor_by_layout(tensor, layout) - self.parameters_dict()[key].set_parameter_data(new_tensor) - self.parameters_dict()[key].sliced = True - elif isinstance(params, OrderedDict): + params = self.parameters_dict() + if isinstance(params, OrderedDict): for key in params: tensor = params[key].data if key not in self.parameter_layout_dict: @@ -373,8 +361,7 @@ class Cell: continue layout = self.parameter_layout_dict[key] new_tensor = _load_tensor_by_layout(tensor, layout) - params[key].set_parameter_data(new_tensor) - params[key].sliced = True + params[key].set_parameter_data(new_tensor, True) else: raise TypeError('Parameters need OrderedDict type, but got {}'. format(type(params))) @@ -545,17 +532,46 @@ class Cell: """ raise NotImplementedError - def init_parameters_data(self, recurse=True, auto_parallel_mode=False): - """Init parameters' data.""" - for param in self.get_parameters(expand=recurse): - if not auto_parallel_mode: - param.init_data() - elif param.name not in self.parameter_layout_dict: - logger.debug("Layout dict does not contain the key %s.", param.name) - param.init_data(set_sliced=True) - else: - layout = self.parameter_layout_dict[param.name] - param.init_data(layout, set_sliced=True) + def init_parameters_data(self, auto_parallel_mode=False): + """ + Init all parameters' data and replace the original saved parameters in cell. + + Args: + auto_parallel_mode (bool): If running in auto_parallel_mode. + + Returns: + Dict[Parameter, Parameter], returns a dict of original parameter and replaced parameter. + """ + replace = dict() + def _updata(param): + if param in replace: + return replace[param] + layout = None + set_sliced = False + if auto_parallel_mode: + set_sliced = True + if param.name not in self.parameter_layout_dict: + logger.debug("Layout dict does not contain the key %s.", param.name) + else: + layout = self.parameter_layout_dict[param.name] + new_p = param.init_data(layout, set_sliced=set_sliced) + replace[param] = new_p + return new_p + # replace all original usage. + cells = self.cells_and_names() + for _, cell in cells: + params = cell._params.items() + for param_name, param in params: + cell._params[param_name] = _updata(param) + cell_dict = cell.__dict__ + for key in cell_dict: + if isinstance(cell_dict[key], ParameterTuple): + param_tuple = cell_dict[key] + new_param_tuple = [] + for param in param_tuple: + new_param_tuple.append(_updata(param)) + cell.__dict__[key] = ParameterTuple(new_param_tuple) + return replace def parameters_dict(self, recurse=True): """ @@ -682,9 +698,10 @@ class Cell: for cell_name, cell in cells: params = cell._params.items() for par_name, par in params: - if par and par not in params_set: + if par.inited_param is not None: + par = par.inited_param + if par is not None and par not in params_set: params_set.add(par) - par_new_name = par_name if cell_name: par_new_name = cell_name + '.' + par_new_name diff --git a/mindspore/nn/optim/optimizer.py b/mindspore/nn/optim/optimizer.py index 9a4bf0fce2..00c26dd18f 100755 --- a/mindspore/nn/optim/optimizer.py +++ b/mindspore/nn/optim/optimizer.py @@ -90,7 +90,7 @@ class Optimizer(Cell): def __init__(self, learning_rate, parameters, weight_decay=0.0, loss_scale=1.0): super(Optimizer, self).__init__(auto_prefix=False) - if parameters and not isinstance(parameters, list): + if parameters is not None and not isinstance(parameters, list): parameters = list(parameters) if not parameters: diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py index 8b54002669..8c11981268 100644 --- a/mindspore/train/serialization.py +++ b/mindspore/train/serialization.py @@ -295,7 +295,6 @@ def load_param_into_net(net, parameter_dict): logger.error("Failed to combine the net and the parameters.") msg = ("Argument parameter_dict element should be a Parameter, but got {}.".format(type(new_param))) raise TypeError(msg) - param.init_data() _update_param(param, new_param) else: param_not_load.append(param.name) @@ -362,15 +361,13 @@ def _exec_save_checkpoint(train_network, ckpt_file_name, integrated_save=True, a integrated_save (bool): Whether to integrated save in automatic model parallel scene. async_save (bool): Whether asynchronous execute save checkpoint into file. Default: False. """ - + train_network.init_parameters_data() param_dict = {} for _, param in train_network.parameters_and_names(): param_dict[param.name] = param - param_list = [] for (key, value) in param_dict.items(): each_param = {"name": key} - value.init_data() if isinstance(value.data, Tensor): param_data = value.data else: diff --git a/model_zoo/official/cv/mobilenetv2/src/mobilenetV2.py b/model_zoo/official/cv/mobilenetv2/src/mobilenetV2.py index 5b1b4cc5ef..76fa21acdf 100644 --- a/model_zoo/official/cv/mobilenetv2/src/mobilenetV2.py +++ b/model_zoo/official/cv/mobilenetv2/src/mobilenetV2.py @@ -263,6 +263,7 @@ class MobileNetV2(nn.Cell): Examples: >>> _initialize_weights() """ + self.init_parameters_data() for _, m in self.cells_and_names(): if isinstance(m, (nn.Conv2d, DepthwiseConv)): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels diff --git a/model_zoo/official/cv/mobilenetv2_quant/src/mobilenetV2.py b/model_zoo/official/cv/mobilenetv2_quant/src/mobilenetV2.py index 25dccfed10..c20da4a9bb 100644 --- a/model_zoo/official/cv/mobilenetv2_quant/src/mobilenetV2.py +++ b/model_zoo/official/cv/mobilenetv2_quant/src/mobilenetV2.py @@ -196,6 +196,7 @@ class mobilenetV2(nn.Cell): self.head = nn.SequentialCell(head) # init weights + self.init_parameters_data() self._initialize_weights() def construct(self, x): @@ -215,6 +216,7 @@ class mobilenetV2(nn.Cell): Examples: >>> _initialize_weights() """ + self.init_parameters_data() for _, m in self.cells_and_names(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels diff --git a/model_zoo/official/cv/mobilenetv3/src/mobilenetV3.py b/model_zoo/official/cv/mobilenetv3/src/mobilenetV3.py index 61b63f9ea1..004b5aafcf 100644 --- a/model_zoo/official/cv/mobilenetv3/src/mobilenetV3.py +++ b/model_zoo/official/cv/mobilenetv3/src/mobilenetV3.py @@ -200,6 +200,7 @@ class ResUnit(nn.Cell): self.add = P.TensorAdd() if self.use_short_cut_conv else None def construct(self, x): + """construct""" if self.first_conv: out = self.expand(x) else: @@ -289,6 +290,7 @@ class MobileNetV3(nn.Cell): kernel_size=1, has_bias=True, pad_mode='pad') self.squeeze = P.Squeeze(axis=(2, 3)) + self.init_parameters_data() self._initialize_weights() def construct(self, x): diff --git a/tests/st/networks/models/bert/test_bert_graph_kernel.py b/tests/st/networks/models/bert/test_bert_graph_kernel.py index 24aec5084e..576c7a32c5 100644 --- a/tests/st/networks/models/bert/test_bert_graph_kernel.py +++ b/tests/st/networks/models/bert/test_bert_graph_kernel.py @@ -171,9 +171,9 @@ def test_bert_tdt(): netwithgrads.set_train(True) model = Model(netwithgrads) callback = ModelCallback() + netwithloss.init_parameters_data() params = netwithloss.trainable_params() for param in params: - param.init_data() value = param.default_input name = param.name if isinstance(value, Tensor): diff --git a/tests/st/networks/models/bert/test_bert_tdt_lossscale.py b/tests/st/networks/models/bert/test_bert_tdt_lossscale.py index fb64d3db2a..3c41f4dd7d 100644 --- a/tests/st/networks/models/bert/test_bert_tdt_lossscale.py +++ b/tests/st/networks/models/bert/test_bert_tdt_lossscale.py @@ -207,9 +207,9 @@ def test_bert_percision(): netwithgrads.set_train(True) model = Model(netwithgrads) callback = ModelCallback() + netwithloss.init_parameters_data() params = netwithloss.trainable_params() for param in params: - param.init_data() value = param.default_input name = param.name if isinstance(value, Tensor): @@ -279,9 +279,9 @@ def test_bert_performance(): netwithgrads.set_train(True) model = Model(netwithgrads) callback = ModelCallback() + netwithloss.init_parameters_data() params = netwithloss.trainable_params() for param in params: - param.init_data() value = param.default_input name = param.name if isinstance(value, Tensor): diff --git a/tests/st/networks/models/deeplabv3/test_deeplabv3.py b/tests/st/networks/models/deeplabv3/test_deeplabv3.py index e458897ae6..8ee6000eee 100644 --- a/tests/st/networks/models/deeplabv3/test_deeplabv3.py +++ b/tests/st/networks/models/deeplabv3/test_deeplabv3.py @@ -63,6 +63,7 @@ class LossCallBack(Callback): str(cb_params.net_outputs))) def model_fine_tune(train_net, fix_weight_layer): + train_net.init_parameters_data() for para in train_net.trainable_params(): para.set_parameter_data(Tensor(np.ones(para.data.shape).astype(np.float32) * 0.02)) if fix_weight_layer in para.name: diff --git a/tests/st/networks/models/resnet50/test_resnet50_imagenet.py b/tests/st/networks/models/resnet50/test_resnet50_imagenet.py index 517c7da5e7..220b986208 100644 --- a/tests/st/networks/models/resnet50/test_resnet50_imagenet.py +++ b/tests/st/networks/models/resnet50/test_resnet50_imagenet.py @@ -174,9 +174,14 @@ def train_process(q, device_id, epoch_size, device_num, enable_hccl): steps_per_epoch=step_size, lr_decay_mode=config.lr_decay_mode)) # optimizer - decayed_params = list(filter(lambda x: 'beta' not in x.name and 'gamma' not in x.name and 'bias' not in x.name, - net.trainable_params())) - no_decayed_params = [param for param in net.trainable_params() if param not in decayed_params] + decayed_params = [] + no_decayed_params = [] + for param in net.trainable_params(): + if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name: + decayed_params.append(param) + else: + no_decayed_params.append(param) + group_params = [{'params': decayed_params, 'weight_decay': config.weight_decay}, {'params': no_decayed_params, 'weight_decay': 0.0}, {'order_params': net.trainable_params()}] diff --git a/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op_test.cc b/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op_test.cc index e3441d45e3..f5c3902983 100644 --- a/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op_test.cc +++ b/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_hccl_op_test.cc @@ -107,7 +107,6 @@ TEST_F(TestHWInsertMemcpyForHccl, test_cond2) { for (auto p : kg->parameters()) { auto param = p->cast(); EXPECT_NE(param, nullptr); - param->set_default_param(std::make_shared()); } auto optimizer = std::make_shared(); @@ -157,7 +156,6 @@ TEST_F(TestHWInsertMemcpyForHccl, test_cond4) { for (auto p : kg->parameters()) { auto param = p->cast(); EXPECT_NE(param, nullptr); - param->set_default_param(std::make_shared()); } auto optimizer = std::make_shared(); @@ -185,7 +183,6 @@ TEST_F(TestHWInsertMemcpyForHccl, test_cond5) { for (auto p : kg->parameters()) { auto param = p->cast(); EXPECT_NE(param, nullptr); - param->set_default_param(std::make_shared()); } auto optimizer = std::make_shared(); diff --git a/tests/ut/cpp/session/anf_runtime_algorithm_test.cc b/tests/ut/cpp/session/anf_runtime_algorithm_test.cc index ac38e5427e..1ddc5d7b14 100644 --- a/tests/ut/cpp/session/anf_runtime_algorithm_test.cc +++ b/tests/ut/cpp/session/anf_runtime_algorithm_test.cc @@ -766,7 +766,7 @@ TEST_F(AnfRuntimeAlgorithmTest, IsParameterWeight) { auto kernel_graph = std::make_shared(); auto parameter_node = kernel_graph->add_parameter(); MS_EXCEPTION_IF_NULL(parameter_node); - auto param_value_new = std::make_shared(); + auto param_value_new = std::make_shared(int64_t(0), kInt32); parameter_node->set_default_param(param_value_new); EXPECT_TRUE(AnfAlgo::IsParameterWeight(parameter_node)); EXPECT_THROW(AnfAlgo::IsParameterWeight(nullptr), std::runtime_error); diff --git a/tests/ut/cpp/session/kernel_graph_test.cc b/tests/ut/cpp/session/kernel_graph_test.cc index 0961b09d62..3cf7189a08 100644 --- a/tests/ut/cpp/session/kernel_graph_test.cc +++ b/tests/ut/cpp/session/kernel_graph_test.cc @@ -82,7 +82,7 @@ TEST_F(KernelGraphTest, NewParameter) { // test weight parameter node as input auto weight_parameter_node = anf_graph->add_parameter(); MS_EXCEPTION_IF_NULL(weight_parameter_node); - auto param_value_new = std::make_shared(); + auto param_value_new = std::make_shared(kNumberTypeFloat32, shape); weight_parameter_node->set_default_param(param_value_new); weight_parameter_node->set_abstract(x_abstract); auto new_weight_parameter_node = kernel_graph->NewParameter(weight_parameter_node); diff --git a/tests/ut/python/ir/test_tensor.py b/tests/ut/python/ir/test_tensor.py index 72100b2715..b8631e81dc 100644 --- a/tests/ut/python/ir/test_tensor.py +++ b/tests/ut/python/ir/test_tensor.py @@ -225,7 +225,7 @@ def test_div(): @non_graph_engine def test_parameter(): x = Parameter(initializer(1, [1], ms.float32), name="beta1_power") - x.init_data() + x = x.init_data() z = x / 2 print(z) diff --git a/tests/ut/python/nn/test_parameter.py b/tests/ut/python/nn/test_parameter.py index 492e74922a..75d03eda9b 100644 --- a/tests/ut/python/nn/test_parameter.py +++ b/tests/ut/python/nn/test_parameter.py @@ -139,14 +139,31 @@ def test_parameter_lazy_init(): # Call init_data() without set default_input. para = Parameter(initializer('ones', [1, 2, 3], mstype.float32), 'test1') assert not isinstance(para.default_input, Tensor) - para.init_data() + para = para.init_data() assert isinstance(para.default_input, Tensor) assert np.array_equal(para.default_input.asnumpy(), np.ones((1, 2, 3))) # Call init_data() after default_input is set. para = Parameter(initializer('ones', [1, 2, 3], mstype.float32), 'test2') assert not isinstance(para.default_input, Tensor) - para.default_input = Tensor(np.zeros((1, 2, 3))) - assert np.array_equal(para.default_input.asnumpy(), np.zeros((1, 2, 3))) - para.init_data() # expect no effect. + # expect type error when not init + with pytest.raises(TypeError): + para.default_input = Tensor(np.zeros((1, 2, 3))) + # init then assign + para = para.init_data() + # check the type + with pytest.raises(ValueError): + para.default_input = Tensor(np.zeros((1, 2, 3))) + # check the shape + with pytest.raises(ValueError): + para.default_input = Tensor(np.zeros((1, 2))) + # expect change ok + para.default_input = Tensor(np.zeros((1, 2, 3)).astype(np.float32)) assert np.array_equal(para.default_input.asnumpy(), np.zeros((1, 2, 3))) + para.default_input = initializer('ones', [1, 2, 3], mstype.float32) + assert isinstance(para.default_input, Tensor) + # same object and has inited + assert np.array_equal(para.default_input.asnumpy(), np.ones((1, 2, 3))) + # expect no effect. + para.init_data() + assert np.array_equal(para.default_input.asnumpy(), np.ones((1, 2, 3))) diff --git a/tests/ut/python/train/quant/test_quant.py b/tests/ut/python/train/quant/test_quant.py index 977497de55..10e7250fcf 100644 --- a/tests/ut/python/train/quant/test_quant.py +++ b/tests/ut/python/train/quant/test_quant.py @@ -69,8 +69,7 @@ def test_qat_lenet(): net = qat.convert_quant_network( net, bn_fold=True, per_channel=[True, False], symmetric=[True, False]) # should load the checkpoint. mock here - for param in net.get_parameters(): - param.init_data() + net.init_parameters_data() qat.export(net, img, file_name="quant.pb") @@ -80,8 +79,7 @@ def test_qat_mobile_per_channel_tf(): img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32)) network = qat.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False]) # should load the checkpoint. mock here - for param in network.get_parameters(): - param.init_data() + network.init_parameters_data() qat.export(network, img, file_name="quant.pb") @pytest.mark.skip(reason="no `te.lang.cce` in ut env") @@ -90,6 +88,5 @@ def test_qat_mobile_per_channel_ff(): img = Tensor(np.ones((1, 3, 224, 224)).astype(np.float32)) network = qat.convert_quant_network(network, bn_fold=True, per_channel=[False, False], symmetric=[True, False]) # should load the checkpoint. mock here - for param in network.get_parameters(): - param.init_data() + network.init_parameters_data() qat.export(network, img, file_name="quant.pb")