forked from mindspore-Ecosystem/mindspore
!2347 support multi param for tuple grad
Merge pull request !2347 from riemann_penn/support_multi_param_for_tuple_grad
This commit is contained in:
commit
e8639ad91e
|
@ -59,7 +59,8 @@ class UndeterminedShapeType {
|
|||
public:
|
||||
explicit UndeterminedShapeType(const std::string &env_str) {
|
||||
// param_name indices_shape indices_type values_shape values_type dense_shape
|
||||
// export UNDETERMINED_SPARSE_SHAPE_TYPES="w1:2:Int32:2 1 2:Float32:3 1 2"
|
||||
// export UNDETERMINED_SPARSE_SHAPE_TYPES="sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1
|
||||
// 2:Float32:3 1 2"
|
||||
std::vector<string> fields;
|
||||
string tmp;
|
||||
std::stringstream input(env_str);
|
||||
|
@ -115,6 +116,20 @@ std::vector<int> UndeterminedShapeType::GetShape(const std::string &shape_str) {
|
|||
}
|
||||
const size_t UndeterminedShapeType::fields_num = 6;
|
||||
|
||||
std::unordered_map<std::string, UndeterminedShapeType> g_undetermined_configs;
|
||||
void InitUndeterminedFromEnv(const std::string &sparse_shape_types) {
|
||||
if (!g_undetermined_configs.empty()) {
|
||||
return;
|
||||
}
|
||||
std::string tmp;
|
||||
std::stringstream input(sparse_shape_types);
|
||||
while (std::getline(input, tmp, ';')) {
|
||||
auto config = UndeterminedShapeType(tmp);
|
||||
g_undetermined_configs.insert(std::make_pair(config.param_name(), config));
|
||||
MS_LOG(DEBUG) << "Undetermined config from env: " << tmp;
|
||||
}
|
||||
}
|
||||
|
||||
AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||
const AbstractBasePtrList &args_spec_list) {
|
||||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
|
@ -128,27 +143,33 @@ AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePt
|
|||
MS_LOG(EXCEPTION) << "EnvGetItem evaluator args[1] should be a SymbolicKeyInstance but: " << key->ToString();
|
||||
}
|
||||
|
||||
if (key->sparse_grad()) {
|
||||
if (!key->sparse_grad().empty()) {
|
||||
// Will be fixed once undetermined type ready
|
||||
auto sparse_shape_types = common::GetEnv("UNDETERMINED_SPARSE_SHAPE_TYPES");
|
||||
if (sparse_shape_types.empty()) {
|
||||
sparse_shape_types = "w1:2:Int32:2 1 2:Float32:3 1 2";
|
||||
sparse_shape_types = "sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1 2:Float32:3 1 2";
|
||||
}
|
||||
MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString() << ", Undetermined shape is "
|
||||
<< sparse_shape_types;
|
||||
InitUndeterminedFromEnv(sparse_shape_types);
|
||||
|
||||
auto shape_types = UndeterminedShapeType(sparse_shape_types);
|
||||
auto shape_types = g_undetermined_configs.find(key->sparse_grad());
|
||||
if (shape_types == g_undetermined_configs.end()) {
|
||||
MS_LOG(EXCEPTION) << "Param " << key->ToString()
|
||||
<< " has sparse_grad, but shape/type is not configured in env UNDETERMINED_SPARSE_SHAPE_TYPES: "
|
||||
<< sparse_shape_types;
|
||||
}
|
||||
MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString();
|
||||
AbstractBasePtrList sparse_list;
|
||||
// indices
|
||||
auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types.indices_type());
|
||||
auto indices = std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types.indices_shape()));
|
||||
auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.indices_type());
|
||||
auto indices =
|
||||
std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types->second.indices_shape()));
|
||||
sparse_list.emplace_back(indices);
|
||||
// values
|
||||
auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types.values_type());
|
||||
auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types.values_shape()));
|
||||
auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.values_type());
|
||||
auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types->second.values_shape()));
|
||||
sparse_list.emplace_back(dout);
|
||||
// dense_shape
|
||||
sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types.dense_shape()));
|
||||
sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types->second.dense_shape()));
|
||||
return std::make_shared<AbstractTuple>(sparse_list);
|
||||
}
|
||||
|
||||
|
|
|
@ -229,7 +229,8 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
|
|||
if (param_node->has_default()) {
|
||||
auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_node->default_param());
|
||||
AbstractBasePtr ptr = abstract::FromValue(parse::data_converter::PyDataToValue(param_value->value()), true);
|
||||
auto sparse_grad = py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad"));
|
||||
auto sparse_grad =
|
||||
py::cast<std::string>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad"));
|
||||
ptr->set_sparse_grad(sparse_grad);
|
||||
|
||||
parallel::ParallelParameterContextRestoreInNoTraining(func_graph, param_node, ptr);
|
||||
|
|
|
@ -44,7 +44,7 @@ class AbstractBase : public Base {
|
|||
public:
|
||||
explicit AbstractBase(const ValuePtr &value = nullptr, const TypePtr &type = kAnyType,
|
||||
const BaseShapePtr &shape = kNoShape)
|
||||
: value_(value), type_(type), shape_(shape), sparse_grad_(false) {}
|
||||
: value_(value), type_(type), shape_(shape), sparse_grad_("") {}
|
||||
~AbstractBase() override = default;
|
||||
MS_DECLARE_PARENT(AbstractBase, Base)
|
||||
|
||||
|
@ -53,13 +53,13 @@ class AbstractBase : public Base {
|
|||
|
||||
virtual bool operator==(const AbstractBase &other) const;
|
||||
void set_value(const ValuePtr &value) { value_ = value; }
|
||||
void set_sparse_grad(const bool &sparse_grad) { sparse_grad_ = sparse_grad; }
|
||||
void set_sparse_grad(const std::string &sparse_grad) { sparse_grad_ = sparse_grad; }
|
||||
void set_type(const TypePtr &type) { type_ = type; }
|
||||
void set_shape(const BaseShapePtr &shape) { shape_ = shape; }
|
||||
void set_value_desc(const std::string &desc) { value_desc_ = desc; }
|
||||
const std::string &value_desc() const { return value_desc_; }
|
||||
ValuePtr GetValueTrack() const { return value_; }
|
||||
bool sparse_grad() const { return sparse_grad_; }
|
||||
const std::string &sparse_grad() const { return sparse_grad_; }
|
||||
TypePtr GetTypeTrack() const { return type_; }
|
||||
BaseShapePtr GetShapeTrack() const { return shape_; }
|
||||
|
||||
|
@ -87,7 +87,7 @@ class AbstractBase : public Base {
|
|||
TypePtr type_;
|
||||
BaseShapePtr shape_;
|
||||
std::string value_desc_; // store initial value description for error report
|
||||
bool sparse_grad_;
|
||||
std::string sparse_grad_;
|
||||
};
|
||||
|
||||
class AbstractScalar : public AbstractBase {
|
||||
|
|
|
@ -51,9 +51,9 @@ class Parameter:
|
|||
requires_grad (bool): True if the parameter requires gradient. Default: True.
|
||||
layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in paralle mode,
|
||||
broadcast and gradients communication would not be applied on parameters. Default: False.
|
||||
sparse_grad (bool): True if the parameter's gradient is sparse. Default: False.
|
||||
sparse_grad (str): Set if the parameter's gradient is sparse. Default: empty.
|
||||
"""
|
||||
def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=False):
|
||||
def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=""):
|
||||
self.set_parameter_data(default_input)
|
||||
self.name = name
|
||||
self.requires_grad = requires_grad
|
||||
|
@ -181,9 +181,9 @@ class Parameter:
|
|||
return self._sparse_grad
|
||||
|
||||
@sparse_grad.setter
|
||||
def sparse_grad(self, value=True):
|
||||
if not isinstance(value, bool):
|
||||
raise TypeError("`sparse_grad` parameter must be bool type")
|
||||
def sparse_grad(self, value=""):
|
||||
if not isinstance(value, str):
|
||||
raise TypeError("`sparse_grad` parameter must be str type")
|
||||
self._sparse_grad = value
|
||||
|
||||
@property
|
||||
|
|
|
@ -156,7 +156,7 @@ class Adam(Optimizer):
|
|||
To improve parameter groups performance, the customized order of parameters can be supported.
|
||||
|
||||
The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
|
||||
`sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse
|
||||
`sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
|
||||
behavior is currently performed on the CPU, weight decay is not supported.
|
||||
|
||||
Args:
|
||||
|
|
|
@ -72,7 +72,7 @@ class FTRL(Optimizer):
|
|||
|
||||
Note:
|
||||
The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
|
||||
`sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse
|
||||
`sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
|
||||
behavior is currently performed on the CPU, weight decay is not supported.
|
||||
|
||||
Args:
|
||||
|
|
|
@ -92,9 +92,10 @@ class LazyAdam(Optimizer):
|
|||
applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters.
|
||||
|
||||
The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
|
||||
`sparse_grad` of `Parameter` being set as True. The sparse behavior, to be notice, is not equivalent to the
|
||||
`sparse_grad` of `Parameter` being set. The sparse behavior, to be notice, is not equivalent to the
|
||||
original Adam algorithm, as only the current indices parames will be updated. The sparse feature is under
|
||||
continuous development. The sparse behavior is currently performed on the CPU, weight decay is not supported.
|
||||
continuous development. The sparse behavior is currently performed on the CPU, weight decay is
|
||||
not supported.
|
||||
|
||||
Args:
|
||||
params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated,
|
||||
|
|
|
@ -241,6 +241,7 @@ class HyperMap(HyperMap_):
|
|||
return func(*args_list)
|
||||
return tuple(map(hypermap, *args_list))
|
||||
|
||||
|
||||
class Map(Map_):
|
||||
"""
|
||||
Map will apply the set operation on input sequences.
|
||||
|
@ -271,37 +272,12 @@ class Map(Map_):
|
|||
Map_.__init__(self)
|
||||
|
||||
def __call__(self, *args):
|
||||
func = args[0]
|
||||
count = 0
|
||||
count_max = 1
|
||||
args_list = args[1:]
|
||||
if self.ops is not None:
|
||||
func = self.ops
|
||||
args_list = args
|
||||
for item in args_list:
|
||||
if isinstance(item, (tuple, list)):
|
||||
count_max = len(item)
|
||||
break
|
||||
|
||||
def get_item(x):
|
||||
nonlocal count
|
||||
if isinstance(x, (tuple, list)):
|
||||
return x[count]
|
||||
return x
|
||||
|
||||
for i in range(count_max):
|
||||
true_args = tuple(map(get_item, args_list))
|
||||
func(*true_args)
|
||||
count = i + 1
|
||||
return True
|
||||
|
||||
def register(self, *type_names):
|
||||
"""Register a function for the given type string."""
|
||||
|
||||
def deco(fn):
|
||||
self.register_fn(type_names, fn)
|
||||
return fn
|
||||
return deco
|
||||
func = self.ops
|
||||
args_list = args
|
||||
if self.ops is None:
|
||||
func = args[0]
|
||||
args_list = args[1:]
|
||||
return tuple(map(func, *args_list))
|
||||
|
||||
|
||||
class _ListAppend(ListAppend_):
|
||||
|
|
|
@ -53,7 +53,8 @@ class NetWithSparseGatherV2(nn.Cell):
|
|||
""" NetWithSparseGatherV2 definition """
|
||||
def __init__(self):
|
||||
super(NetWithSparseGatherV2, self).__init__()
|
||||
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
|
||||
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
|
||||
name="weight1", sparse_grad="sparse_key_w1")
|
||||
self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
|
||||
self.axis = 0
|
||||
self.gather = P.SparseGatherV2()
|
||||
|
|
|
@ -154,8 +154,8 @@ def test_AdamWeightDecaySparse():
|
|||
class NetWithSparseGatherV2(nn.Cell):
|
||||
def __init__(self):
|
||||
super(NetWithSparseGatherV2, self).__init__()
|
||||
self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad=True)
|
||||
self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2")
|
||||
self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad="sparse_key_w1")
|
||||
self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2", sparse_grad="sparse_key_w2")
|
||||
self.gatherv2 = P.SparseGatherV2()
|
||||
self.axis = 0
|
||||
def construct(self, indices):
|
||||
|
|
|
@ -41,7 +41,8 @@ class NetWithSparseGatherV2(nn.Cell):
|
|||
""" NetWithSparseGatherV2 definition """
|
||||
def __init__(self):
|
||||
super(NetWithSparseGatherV2, self).__init__()
|
||||
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
|
||||
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
|
||||
name="weight1", sparse_grad="sparse_key_w1")
|
||||
self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
|
||||
self.axis = 0
|
||||
self.gather = P.SparseGatherV2()
|
||||
|
|
|
@ -43,7 +43,8 @@ class NetWithSparseGatherV2(nn.Cell):
|
|||
""" NetWithSparseGatherV2 definition """
|
||||
def __init__(self):
|
||||
super(NetWithSparseGatherV2, self).__init__()
|
||||
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
|
||||
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
|
||||
name="weight1", sparse_grad="sparse_key_w1")
|
||||
self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
|
||||
self.axis = 0
|
||||
self.gather = P.SparseGatherV2()
|
||||
|
|
|
@ -40,7 +40,8 @@ class NetWithSparseGatherV2(nn.Cell):
|
|||
""" NetWithSparseGatherV2 definition """
|
||||
def __init__(self):
|
||||
super(NetWithSparseGatherV2, self).__init__()
|
||||
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
|
||||
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1",
|
||||
sparse_grad="sparse_key_w1")
|
||||
self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="weight2")
|
||||
self.axis = 0
|
||||
self.gather = P.SparseGatherV2()
|
||||
|
|
Loading…
Reference in New Issue