!2347 support multi param for tuple grad

Merge pull request !2347 from riemann_penn/support_multi_param_for_tuple_grad
This commit is contained in:
mindspore-ci-bot 2020-06-20 10:30:37 +08:00 committed by Gitee
commit e8639ad91e
13 changed files with 65 additions and 62 deletions

View File

@ -59,7 +59,8 @@ class UndeterminedShapeType {
public:
explicit UndeterminedShapeType(const std::string &env_str) {
// param_name indices_shape indices_type values_shape values_type dense_shape
// export UNDETERMINED_SPARSE_SHAPE_TYPES="w1:2:Int32:2 1 2:Float32:3 1 2"
// export UNDETERMINED_SPARSE_SHAPE_TYPES="sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1
// 2:Float32:3 1 2"
std::vector<string> fields;
string tmp;
std::stringstream input(env_str);
@ -115,6 +116,20 @@ std::vector<int> UndeterminedShapeType::GetShape(const std::string &shape_str) {
}
const size_t UndeterminedShapeType::fields_num = 6;
std::unordered_map<std::string, UndeterminedShapeType> g_undetermined_configs;
void InitUndeterminedFromEnv(const std::string &sparse_shape_types) {
if (!g_undetermined_configs.empty()) {
return;
}
std::string tmp;
std::stringstream input(sparse_shape_types);
while (std::getline(input, tmp, ';')) {
auto config = UndeterminedShapeType(tmp);
g_undetermined_configs.insert(std::make_pair(config.param_name(), config));
MS_LOG(DEBUG) << "Undetermined config from env: " << tmp;
}
}
AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
const AbstractBasePtrList &args_spec_list) {
MS_EXCEPTION_IF_NULL(primitive);
@ -128,27 +143,33 @@ AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePt
MS_LOG(EXCEPTION) << "EnvGetItem evaluator args[1] should be a SymbolicKeyInstance but: " << key->ToString();
}
if (key->sparse_grad()) {
if (!key->sparse_grad().empty()) {
// Will be fixed once undetermined type ready
auto sparse_shape_types = common::GetEnv("UNDETERMINED_SPARSE_SHAPE_TYPES");
if (sparse_shape_types.empty()) {
sparse_shape_types = "w1:2:Int32:2 1 2:Float32:3 1 2";
sparse_shape_types = "sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1 2:Float32:3 1 2";
}
MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString() << ", Undetermined shape is "
<< sparse_shape_types;
InitUndeterminedFromEnv(sparse_shape_types);
auto shape_types = UndeterminedShapeType(sparse_shape_types);
auto shape_types = g_undetermined_configs.find(key->sparse_grad());
if (shape_types == g_undetermined_configs.end()) {
MS_LOG(EXCEPTION) << "Param " << key->ToString()
<< " has sparse_grad, but shape/type is not configured in env UNDETERMINED_SPARSE_SHAPE_TYPES: "
<< sparse_shape_types;
}
MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString();
AbstractBasePtrList sparse_list;
// indices
auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types.indices_type());
auto indices = std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types.indices_shape()));
auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.indices_type());
auto indices =
std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types->second.indices_shape()));
sparse_list.emplace_back(indices);
// values
auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types.values_type());
auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types.values_shape()));
auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.values_type());
auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types->second.values_shape()));
sparse_list.emplace_back(dout);
// dense_shape
sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types.dense_shape()));
sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types->second.dense_shape()));
return std::make_shared<AbstractTuple>(sparse_list);
}

View File

@ -229,7 +229,8 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
if (param_node->has_default()) {
auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_node->default_param());
AbstractBasePtr ptr = abstract::FromValue(parse::data_converter::PyDataToValue(param_value->value()), true);
auto sparse_grad = py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad"));
auto sparse_grad =
py::cast<std::string>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad"));
ptr->set_sparse_grad(sparse_grad);
parallel::ParallelParameterContextRestoreInNoTraining(func_graph, param_node, ptr);

View File

@ -44,7 +44,7 @@ class AbstractBase : public Base {
public:
explicit AbstractBase(const ValuePtr &value = nullptr, const TypePtr &type = kAnyType,
const BaseShapePtr &shape = kNoShape)
: value_(value), type_(type), shape_(shape), sparse_grad_(false) {}
: value_(value), type_(type), shape_(shape), sparse_grad_("") {}
~AbstractBase() override = default;
MS_DECLARE_PARENT(AbstractBase, Base)
@ -53,13 +53,13 @@ class AbstractBase : public Base {
virtual bool operator==(const AbstractBase &other) const;
void set_value(const ValuePtr &value) { value_ = value; }
void set_sparse_grad(const bool &sparse_grad) { sparse_grad_ = sparse_grad; }
void set_sparse_grad(const std::string &sparse_grad) { sparse_grad_ = sparse_grad; }
void set_type(const TypePtr &type) { type_ = type; }
void set_shape(const BaseShapePtr &shape) { shape_ = shape; }
void set_value_desc(const std::string &desc) { value_desc_ = desc; }
const std::string &value_desc() const { return value_desc_; }
ValuePtr GetValueTrack() const { return value_; }
bool sparse_grad() const { return sparse_grad_; }
const std::string &sparse_grad() const { return sparse_grad_; }
TypePtr GetTypeTrack() const { return type_; }
BaseShapePtr GetShapeTrack() const { return shape_; }
@ -87,7 +87,7 @@ class AbstractBase : public Base {
TypePtr type_;
BaseShapePtr shape_;
std::string value_desc_; // store initial value description for error report
bool sparse_grad_;
std::string sparse_grad_;
};
class AbstractScalar : public AbstractBase {

View File

@ -51,9 +51,9 @@ class Parameter:
requires_grad (bool): True if the parameter requires gradient. Default: True.
layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in paralle mode,
broadcast and gradients communication would not be applied on parameters. Default: False.
sparse_grad (bool): True if the parameter's gradient is sparse. Default: False.
sparse_grad (str): Set if the parameter's gradient is sparse. Default: empty.
"""
def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=False):
def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=""):
self.set_parameter_data(default_input)
self.name = name
self.requires_grad = requires_grad
@ -181,9 +181,9 @@ class Parameter:
return self._sparse_grad
@sparse_grad.setter
def sparse_grad(self, value=True):
if not isinstance(value, bool):
raise TypeError("`sparse_grad` parameter must be bool type")
def sparse_grad(self, value=""):
if not isinstance(value, str):
raise TypeError("`sparse_grad` parameter must be str type")
self._sparse_grad = value
@property

View File

@ -156,7 +156,7 @@ class Adam(Optimizer):
To improve parameter groups performance, the customized order of parameters can be supported.
The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
`sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse
`sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
behavior is currently performed on the CPU, weight decay is not supported.
Args:

View File

@ -72,7 +72,7 @@ class FTRL(Optimizer):
Note:
The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
`sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse
`sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
behavior is currently performed on the CPU, weight decay is not supported.
Args:

View File

@ -92,9 +92,10 @@ class LazyAdam(Optimizer):
applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters.
The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
`sparse_grad` of `Parameter` being set as True. The sparse behavior, to be notice, is not equivalent to the
`sparse_grad` of `Parameter` being set. The sparse behavior, to be notice, is not equivalent to the
original Adam algorithm, as only the current indices parames will be updated. The sparse feature is under
continuous development. The sparse behavior is currently performed on the CPU, weight decay is not supported.
continuous development. The sparse behavior is currently performed on the CPU, weight decay is
not supported.
Args:
params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated,

View File

@ -241,6 +241,7 @@ class HyperMap(HyperMap_):
return func(*args_list)
return tuple(map(hypermap, *args_list))
class Map(Map_):
"""
Map will apply the set operation on input sequences.
@ -271,37 +272,12 @@ class Map(Map_):
Map_.__init__(self)
def __call__(self, *args):
func = args[0]
count = 0
count_max = 1
args_list = args[1:]
if self.ops is not None:
func = self.ops
args_list = args
for item in args_list:
if isinstance(item, (tuple, list)):
count_max = len(item)
break
def get_item(x):
nonlocal count
if isinstance(x, (tuple, list)):
return x[count]
return x
for i in range(count_max):
true_args = tuple(map(get_item, args_list))
func(*true_args)
count = i + 1
return True
def register(self, *type_names):
"""Register a function for the given type string."""
def deco(fn):
self.register_fn(type_names, fn)
return fn
return deco
func = self.ops
args_list = args
if self.ops is None:
func = args[0]
args_list = args[1:]
return tuple(map(func, *args_list))
class _ListAppend(ListAppend_):

View File

@ -53,7 +53,8 @@ class NetWithSparseGatherV2(nn.Cell):
""" NetWithSparseGatherV2 definition """
def __init__(self):
super(NetWithSparseGatherV2, self).__init__()
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
name="weight1", sparse_grad="sparse_key_w1")
self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
self.axis = 0
self.gather = P.SparseGatherV2()

View File

@ -154,8 +154,8 @@ def test_AdamWeightDecaySparse():
class NetWithSparseGatherV2(nn.Cell):
def __init__(self):
super(NetWithSparseGatherV2, self).__init__()
self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad=True)
self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2")
self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad="sparse_key_w1")
self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2", sparse_grad="sparse_key_w2")
self.gatherv2 = P.SparseGatherV2()
self.axis = 0
def construct(self, indices):

View File

@ -41,7 +41,8 @@ class NetWithSparseGatherV2(nn.Cell):
""" NetWithSparseGatherV2 definition """
def __init__(self):
super(NetWithSparseGatherV2, self).__init__()
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
name="weight1", sparse_grad="sparse_key_w1")
self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
self.axis = 0
self.gather = P.SparseGatherV2()

View File

@ -43,7 +43,8 @@ class NetWithSparseGatherV2(nn.Cell):
""" NetWithSparseGatherV2 definition """
def __init__(self):
super(NetWithSparseGatherV2, self).__init__()
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
name="weight1", sparse_grad="sparse_key_w1")
self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
self.axis = 0
self.gather = P.SparseGatherV2()

View File

@ -40,7 +40,8 @@ class NetWithSparseGatherV2(nn.Cell):
""" NetWithSparseGatherV2 definition """
def __init__(self):
super(NetWithSparseGatherV2, self).__init__()
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1",
sparse_grad="sparse_key_w1")
self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="weight2")
self.axis = 0
self.gather = P.SparseGatherV2()