forked from mindspore-Ecosystem/mindspore
!7771 Add Parallel SegmentOps
Merge pull request !7771 from huangxinjing/segment_ops
This commit is contained in:
commit
376fa95717
|
@ -947,5 +947,122 @@ double GatherV2PCost::GetBackwardComputationCost(const std::vector<TensorInfo> &
|
|||
|
||||
return result;
|
||||
}
|
||||
|
||||
// The forward communication is determined by whether the slice is column split or row split
|
||||
// The number of segments is actually the shape[0] of the output, which is the cost of the AllReduce
|
||||
double UnsortedSegmentSumCost::GetForwardCommCost(const std::vector<TensorInfo> &inputs,
|
||||
const std::vector<TensorInfo> &outputs, int32_t stage_id) const {
|
||||
TensorInfo input0 = inputs[0];
|
||||
TensorInfo input1 = inputs[1];
|
||||
TensorInfo output0 = outputs[0];
|
||||
Shape input0_shape = input0.shape();
|
||||
Shape input0_slice_shape = inputs[0].slice_shape();
|
||||
double result = 0.0;
|
||||
if (inputs_type_lengths_.size() != inputs.size()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid inputs type size " << inputs_type_lengths_.size() << " for UnsortedSegmentSum cost";
|
||||
}
|
||||
// If the shape b is not the same as the shape a, we regard it as column slice
|
||||
for (size_t i = 0; i < input1.shape().size(); ++i) {
|
||||
if (input0_shape[i] != input0_slice_shape[i]) {
|
||||
result = ListProduct(output0.slice_shape()) * static_cast<double>(outputs_type_lengths_[0]);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
double UnsortedSegmentSumCost::GetBackwardCommCost(const std::vector<TensorInfo> &inputs,
|
||||
const std::vector<TensorInfo> &outputs, int32_t stage_id) const {
|
||||
TensorInfo input0 = inputs[0];
|
||||
TensorInfo input1 = inputs[1];
|
||||
TensorInfo output0 = outputs[0];
|
||||
Shape input0_shape = input0.shape();
|
||||
Shape input0_slice_shape = inputs[0].slice_shape();
|
||||
double result = 0.0;
|
||||
if (inputs_type_lengths_.size() != inputs.size()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid inputs type size " << inputs_type_lengths_.size() << " for UnsortedSegmentSum cost";
|
||||
}
|
||||
if (is_parameter_[0]) {
|
||||
// If the forward process has a AllReduce, then the backward also needs one.
|
||||
for (size_t i = 0; i < input1.shape().size(); ++i) {
|
||||
if (input0_shape[i] != input0_slice_shape[i]) {
|
||||
result = ListProduct(output0.slice_shape()) * static_cast<double>(outputs_type_lengths_[0]);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
double UnsortedSegmentSumCost::GetForwardComputationCost(const std::vector<TensorInfo> &inputs,
|
||||
const std::vector<TensorInfo> &outputs, int32_t) const {
|
||||
// In forward phase, the computation cost = slice(A) + slice(B)
|
||||
Shape input0_slice_shape = inputs[0].slice_shape();
|
||||
Shape input1_slice_shape = inputs[1].slice_shape();
|
||||
Shape output_slice_shape = outputs[0].slice_shape();
|
||||
double result = ListProduct(input0_slice_shape) * static_cast<double>(inputs_type_lengths_[0]) +
|
||||
ListProduct(input1_slice_shape) * static_cast<double>(inputs_type_lengths_[1]) +
|
||||
ListProduct(output_slice_shape) * static_cast<double>(outputs_type_lengths_[0]);
|
||||
return result;
|
||||
}
|
||||
|
||||
double UnsortedSegmentMinCost::GetForwardCommCost(const std::vector<TensorInfo> &inputs,
|
||||
const std::vector<TensorInfo> &outputs, int32_t stage_id) const {
|
||||
TensorInfo input0 = inputs[0];
|
||||
TensorInfo input1 = inputs[1];
|
||||
TensorInfo output0 = outputs[0];
|
||||
Shape input0_shape = input0.shape();
|
||||
Shape input0_slice_shape = inputs[0].slice_shape();
|
||||
double result = 0.0;
|
||||
if (inputs_type_lengths_.size() != inputs.size()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid inputs type size " << inputs_type_lengths_.size()
|
||||
<< " for UnsortedSegmentMinCost cost";
|
||||
}
|
||||
// If the shape b is not the same as the shape a, we regard it as column slice
|
||||
// The cost is a AllGather operation, the shape is the same as the output of UnsortedSegmentMin.
|
||||
for (size_t i = 0; i < input1.shape().size(); ++i) {
|
||||
if (input0_shape[i] != input0_slice_shape[i]) {
|
||||
result = ListProduct(output0.slice_shape()) * static_cast<double>(outputs_type_lengths_[0]);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
double UnsortedSegmentMinCost::GetBackwardCommCost(const std::vector<TensorInfo> &inputs,
|
||||
const std::vector<TensorInfo> &outputs, int32_t stage_id) const {
|
||||
TensorInfo input0 = inputs[0];
|
||||
TensorInfo input1 = inputs[1];
|
||||
TensorInfo output0 = outputs[0];
|
||||
Shape input0_shape = input0.shape();
|
||||
Shape input0_slice_shape = inputs[0].slice_shape();
|
||||
double result = 0.0;
|
||||
if (inputs_type_lengths_.size() != inputs.size()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid inputs type size " << inputs_type_lengths_.size()
|
||||
<< " for UnsortedSegmentMinCost cost";
|
||||
}
|
||||
if (is_parameter_[0]) {
|
||||
// If the forward process has a AllGather, then the backward also needs one ReduceScatter.
|
||||
for (size_t i = 0; i < input1.shape().size(); ++i) {
|
||||
if (input0_shape[i] != input0_slice_shape[i]) {
|
||||
result = ListProduct(output0.slice_shape()) * static_cast<double>(outputs_type_lengths_[0]);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
double UnsortedSegmentMinCost::GetForwardComputationCost(const std::vector<TensorInfo> &inputs,
|
||||
const std::vector<TensorInfo> &outputs, int32_t) const {
|
||||
// In forward phase, the computation cost = slice(A) + slice(B)
|
||||
Shape input0_slice_shape = inputs[0].slice_shape();
|
||||
Shape input1_slice_shape = inputs[1].slice_shape();
|
||||
Shape output_slice_shape = outputs[0].slice_shape();
|
||||
// The forward operation is UnsortedSegmentMin + ReudceMin
|
||||
double result = ListProduct(input0_slice_shape) * static_cast<double>(inputs_type_lengths_[0]) +
|
||||
ListProduct(input1_slice_shape) * static_cast<double>(inputs_type_lengths_[1]) +
|
||||
ListProduct(output_slice_shape) * static_cast<double>(outputs_type_lengths_[0]) +
|
||||
ListProduct(output_slice_shape) * static_cast<double>(outputs_type_lengths_[0]); // ReduceMin
|
||||
return result;
|
||||
}
|
||||
} // namespace parallel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -578,6 +578,58 @@ class DropOutCost : public OperatorCost {
|
|||
|
||||
using DropOutCostPtr = std::shared_ptr<DropOutCost>;
|
||||
|
||||
class UnsortedSegmentSumCost : public OperatorCost {
|
||||
public:
|
||||
explicit UnsortedSegmentSumCost(bool is_inputs_related) : OperatorCost(is_inputs_related) {}
|
||||
UnsortedSegmentSumCost() : OperatorCost(true) {}
|
||||
~UnsortedSegmentSumCost() override = default;
|
||||
|
||||
double GetCommCost(const std::vector<TensorInfo> &inputs, const std::vector<TensorInfo> &outputs,
|
||||
int32_t stage_id) const override {
|
||||
return GetForwardCommCost(inputs, outputs, stage_id) + GetBackwardCommCost(inputs, outputs, stage_id);
|
||||
}
|
||||
double GetForwardCommCost(const std::vector<TensorInfo> &, const std::vector<TensorInfo> &, int32_t) const override;
|
||||
double GetBackwardCommCost(const std::vector<TensorInfo> &, const std::vector<TensorInfo> &, int32_t) const override;
|
||||
double GetComputationCost(const std::vector<TensorInfo> &inputs, const std::vector<TensorInfo> &outputs,
|
||||
int32_t stage_id) const override {
|
||||
return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
|
||||
}
|
||||
double GetForwardComputationCost(const std::vector<TensorInfo> &, const std::vector<TensorInfo> &,
|
||||
int32_t) const override;
|
||||
double GetBackwardComputationCost(const std::vector<TensorInfo> &, const std::vector<TensorInfo> &,
|
||||
int32_t) const override {
|
||||
return 0.0;
|
||||
}
|
||||
};
|
||||
|
||||
using UnsortedSegmentSumCostPtr = std::shared_ptr<UnsortedSegmentSumCost>;
|
||||
|
||||
class UnsortedSegmentMinCost : public OperatorCost {
|
||||
public:
|
||||
explicit UnsortedSegmentMinCost(bool is_inputs_related) : OperatorCost(is_inputs_related) {}
|
||||
UnsortedSegmentMinCost() : OperatorCost(true) {}
|
||||
~UnsortedSegmentMinCost() override = default;
|
||||
|
||||
double GetCommCost(const std::vector<TensorInfo> &inputs, const std::vector<TensorInfo> &outputs,
|
||||
int32_t stage_id) const override {
|
||||
return GetForwardCommCost(inputs, outputs, stage_id) + GetBackwardCommCost(inputs, outputs, stage_id);
|
||||
}
|
||||
double GetForwardCommCost(const std::vector<TensorInfo> &, const std::vector<TensorInfo> &, int32_t) const override;
|
||||
double GetBackwardCommCost(const std::vector<TensorInfo> &, const std::vector<TensorInfo> &, int32_t) const override;
|
||||
double GetComputationCost(const std::vector<TensorInfo> &inputs, const std::vector<TensorInfo> &outputs,
|
||||
int32_t stage_id) const override {
|
||||
return GetForwardComputationCost(inputs, outputs, stage_id) + GetBackwardComputationCost(inputs, outputs, stage_id);
|
||||
}
|
||||
double GetForwardComputationCost(const std::vector<TensorInfo> &, const std::vector<TensorInfo> &,
|
||||
int32_t) const override;
|
||||
double GetBackwardComputationCost(const std::vector<TensorInfo> &, const std::vector<TensorInfo> &,
|
||||
int32_t) const override {
|
||||
return 0.0;
|
||||
}
|
||||
};
|
||||
|
||||
using UnsortedSegmentMinCostPtr = std::shared_ptr<UnsortedSegmentMinCost>;
|
||||
|
||||
class LayerNormCost : public OperatorCost {
|
||||
public:
|
||||
explicit LayerNormCost(bool is_inputs_related) : OperatorCost(is_inputs_related) {}
|
||||
|
|
|
@ -173,6 +173,8 @@ REGISTER(ExpandDimsInfo);
|
|||
REGISTER(SqueezeInfo);
|
||||
REGISTER(SigmoidCrossEntropyWithLogitsInfo);
|
||||
REGISTER(SquareInfo);
|
||||
REGISTER(UnsortedSegmentSumInfo);
|
||||
REGISTER(UnsortedSegmentMinInfo);
|
||||
REGISTER(GatherV2PInfo);
|
||||
REGISTER(EmbeddingLookupInfo);
|
||||
REGISTER(TileInfo);
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include "frontend/parallel/ops_info/reduce_method_info.h"
|
||||
#include "frontend/parallel/ops_info/reshape_info.h"
|
||||
#include "frontend/parallel/ops_info/transpose_info.h"
|
||||
#include "frontend/parallel/ops_info/unsorted_segment_op_info.h"
|
||||
#include "frontend/parallel/ops_info/virtual_dataset_info.h"
|
||||
#include "frontend/parallel/ops_info/gather_v2_p_info.h"
|
||||
#include "frontend/parallel/ops_info/tile_info.h"
|
||||
|
|
|
@ -283,6 +283,8 @@ constexpr char IN_TOPK[] = "InTopK";
|
|||
constexpr char GATHER_ND[] = "GatherNd";
|
||||
constexpr char UNSORTEF_SEGMENT_MIND[] = "UnsortedSegmentMinD";
|
||||
constexpr char UNSORTEF_SEGMENT_PRODD[] = "UnsortedSegmentProdD";
|
||||
constexpr char UNSORTED_SEGMENT_SUM[] = "UnsortedSegmentSum";
|
||||
constexpr char UNSORTED_SEGMENT_MIN[] = "UnsortedSegmentMin";
|
||||
constexpr char DEPTHWISE_CONV2D_NATIVE[] = "DepthwiseConv2dNative";
|
||||
constexpr char DEPTHWISE_CONV2D[] = "DepthwiseConv2D";
|
||||
constexpr char ADD[] = "Add";
|
||||
|
|
|
@ -0,0 +1,313 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "frontend/parallel/ops_info/unsorted_segment_op_info.h"
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "frontend/parallel/auto_parallel/costmodel.h"
|
||||
#include "frontend/parallel/device_matrix.h"
|
||||
#include "frontend/parallel/graph_util/generate_graph.h"
|
||||
#include "frontend/parallel/strategy.h"
|
||||
#include "ir/tensor.h"
|
||||
#include "ir/value.h"
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace parallel {
|
||||
// The operator UnsortedSegment accepts three inputs:
|
||||
// input0 : vector, the shape is x1,x2,x3,...,xr
|
||||
// input1 : segment id, the shape is x1,x2,..,xn
|
||||
// input2 : value, the number of the segments
|
||||
// For Sum: r >= n
|
||||
// For Min: r >=n, n=1
|
||||
Status UnsortedSegmentOpInfo::GetAttrs() {
|
||||
if (inputs_shape_.size() != UNSORTEDSEGMENTOP_INPUTS_SIZE) {
|
||||
MS_LOG(ERROR) << name_ << ": inputs shape size must be 2, but is " << inputs_shape_.size();
|
||||
return FAILED;
|
||||
}
|
||||
if (outputs_shape_.size() != UNSORTEDSEGMENTOP_OUTPUTS_SIZE) {
|
||||
MS_LOG(ERROR) << name_ << ": outputs shape size must be 1, but is " << outputs_shape_.size();
|
||||
return FAILED;
|
||||
}
|
||||
if (input_value_.at(2) == nullptr) {
|
||||
MS_LOG(ERROR) << name_ << ": the third input value is nullptr, is not a ValueNode!";
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
if (inputs_shape_.at(0).empty()) {
|
||||
MS_LOG(ERROR) << name_ << ": input can not be a scalar!";
|
||||
return FAILED;
|
||||
}
|
||||
int num_segments = GetValue<int>(input_value_.at(2));
|
||||
if (num_segments < 0) {
|
||||
MS_LOG(ERROR) << name_ << ": the number of segments should be non negative value.";
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Status UnsortedSegmentOpInfo::CheckStrategy(const StrategyPtr &strategy) {
|
||||
// Check size
|
||||
if (inputs_shape_.size() != UNSORTEDSEGMENTOP_INPUTS_SIZE) {
|
||||
MS_LOG(ERROR) << name_ << ": inputs shape size must be " << UNSORTEDSEGMENTOP_INPUTS_SIZE << ", but is "
|
||||
<< inputs_shape_.size();
|
||||
return FAILED;
|
||||
}
|
||||
if (outputs_shape_.size() != UNSORTEDSEGMENTOP_OUTPUTS_SIZE) {
|
||||
MS_LOG(ERROR) << name_ << ": outputs shape size must be " << UNSORTEDSEGMENTOP_OUTPUTS_SIZE << ", but is "
|
||||
<< outputs_shape_.size();
|
||||
return FAILED;
|
||||
}
|
||||
// The strategy of the first and the second input should be set.
|
||||
if (CheckStrategyValue(strategy, {inputs_shape_.at(0), inputs_shape_.at(1)}) != SUCCESS) {
|
||||
MS_LOG(ERROR) << name_ << ": Invalid strategy.";
|
||||
return FAILED;
|
||||
}
|
||||
Strategys stra = strategy->GetInputDim();
|
||||
Dimensions sub_a_strategy = stra.at(0);
|
||||
Dimensions sub_b_strategy = stra.at(1);
|
||||
Shape input_a_shape = inputs_shape_.at(0);
|
||||
Shape input_b_shape = inputs_shape_.at(1);
|
||||
// The size of the input b must be equal or smaller than input a
|
||||
for (size_t i = 0; i < input_b_shape.size(); ++i) {
|
||||
if ((sub_a_strategy[i] != sub_b_strategy[i]) && (input_a_shape[i] != input_b_shape[i])) {
|
||||
MS_LOG(ERROR) << name_
|
||||
<< " : Invalid strategy. The shape and the strategy of the input0 and input1 "
|
||||
"should be same before the front size of the input[1]";
|
||||
return FAILED;
|
||||
}
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Status UnsortedSegmentOpInfo::InferDevMatrixShape() {
|
||||
Strategys stra = strategy_->GetInputDim();
|
||||
dev_matrix_shape_ = stra.at(0);
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
// As the op converts the vector x1,x2,x3...,xr -> number of segments, xn,..,xr
|
||||
// the dimension x1,x2,x3,..,xn is eliminated
|
||||
// suppose the strategy of the inputs is (a,b,c,d), (a,b)
|
||||
// the tensor map of the input vector is (3,2,1,0), id:(1, 0)
|
||||
// the output vector is (-1, 1, 0)
|
||||
Status UnsortedSegmentOpInfo::InferTensorMap() {
|
||||
Shape tensor_map_in;
|
||||
Shape tensor_map_in_index;
|
||||
Shape tensor_map_out;
|
||||
size_t input0_size = inputs_shape_.at(0).size();
|
||||
// such as 4: tensor_map_index [3,2,1,0]
|
||||
for (size_t i = 0; i < input0_size; ++i) {
|
||||
tensor_map_in.push_back(SizeToInt(input0_size - i - 1));
|
||||
tensor_map_in_index.push_back(SizeToInt(input0_size - i - 1));
|
||||
tensor_map_out.push_back(SizeToInt(input0_size - i - 1));
|
||||
}
|
||||
|
||||
(void)tensor_map_out.erase(tensor_map_out.begin(), tensor_map_out.begin() + inputs_shape_.at(1).size() - 1);
|
||||
// A special case: the input vector (a,) id (a,) or input vector (a,b,c), id(a,b,c)
|
||||
// The output vector will be a 1-dim vector,
|
||||
// These two kinds of situations as row slice.
|
||||
tensor_map_out[0] = -1;
|
||||
(void)tensor_map_in_index.erase(tensor_map_in_index.begin() + inputs_shape_.at(1).size(), tensor_map_in_index.end());
|
||||
if (tensor_map_out.size() != outputs_shape_.at(0).size()) {
|
||||
MS_LOG(ERROR) << "Out tensor map size is not equal to output size! Out tensor map size is " << tensor_map_out.size()
|
||||
<< " output size is " << outputs_shape_.at(0).size();
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
inputs_tensor_map_.emplace_back(std::move(tensor_map_in));
|
||||
inputs_tensor_map_.emplace_back(std::move(tensor_map_in_index));
|
||||
outputs_tensor_map_.emplace_back(std::move(tensor_map_out));
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Status UnsortedSegmentOpInfo::InferTensorInfo() {
|
||||
// infer tensor shape
|
||||
Shape input_shape = inputs_shape_.at(0);
|
||||
Shape input_index_shape = inputs_shape_.at(1);
|
||||
Shape output_shape = outputs_shape_.at(0);
|
||||
|
||||
TensorLayout input_tensor_layout, input_index_layout, output_tensor_layout;
|
||||
if ((input_tensor_layout.InitFromVector(dev_matrix_shape_, inputs_tensor_map_.at(0), input_shape) != SUCCESS) ||
|
||||
(input_index_layout.InitFromVector(dev_matrix_shape_, inputs_tensor_map_.at(1), input_index_shape) != SUCCESS) ||
|
||||
(output_tensor_layout.InitFromVector(dev_matrix_shape_, outputs_tensor_map_.at(0), output_shape) != SUCCESS)) {
|
||||
return FAILED;
|
||||
}
|
||||
|
||||
TensorInfo input_tensor_info(input_tensor_layout);
|
||||
TensorInfo input_index_info(input_index_layout);
|
||||
TensorInfo output_tensor_info(output_tensor_layout);
|
||||
|
||||
inputs_tensor_info_.push_back(input_tensor_info);
|
||||
inputs_tensor_info_.push_back(input_index_info);
|
||||
outputs_tensor_info_.push_back(output_tensor_info);
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Status UnsortedSegmentOpInfo::Init(const StrategyPtr &strategy) {
|
||||
if (InitWithAutoRepeatCalc(strategy) != SUCCESS) {
|
||||
MS_LOG(ERROR) << name_ << ": Init failed.";
|
||||
return FAILED;
|
||||
}
|
||||
MS_LOG(INFO) << name_ << ": Init success.";
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Status UnsortedSegmentOpInfo::InitForCostModel(const StrategyPtr &strategy) {
|
||||
if (InitForCostModelWithAutoRepeatCalc(strategy) != SUCCESS) {
|
||||
MS_LOG(ERROR) << name_ << ": Init for cost model failed.";
|
||||
return FAILED;
|
||||
}
|
||||
MS_LOG(INFO) << name_ << ": Init for cost model success.";
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
// Set the default strategy
|
||||
Status UnsortedSegmentOpInfo::GenerateStrategies(int32_t stage_id) {
|
||||
Shape input0_split(inputs_shape_[0].size(), 1);
|
||||
Shapes splittable_inputs = {input0_split};
|
||||
|
||||
std::vector<StrategyPtr> sp_vector;
|
||||
if (GenerateStrategiesForIndependentInputs(stage_id, {inputs_shape_.at(0)}, splittable_inputs, &sp_vector) !=
|
||||
SUCCESS) {
|
||||
MS_LOG(ERROR) << name_ << " : Generate strategies for independent inputs() failed.";
|
||||
return FAILED;
|
||||
}
|
||||
for (auto &sp : sp_vector) {
|
||||
Strategys tmp_strategy;
|
||||
Dimensions first_input_strategy = sp->GetInputDim()[0];
|
||||
Dimensions second_input_strategy;
|
||||
for (size_t i = 0; i < inputs_shape_[1].size(); ++i) {
|
||||
second_input_strategy.push_back(first_input_strategy[i]);
|
||||
}
|
||||
tmp_strategy.push_back(first_input_strategy);
|
||||
tmp_strategy.push_back(second_input_strategy);
|
||||
sp->ResetInputs(tmp_strategy);
|
||||
}
|
||||
size_t success = 0;
|
||||
for (auto &sp : sp_vector) {
|
||||
PrintStrategy(sp);
|
||||
if (SetCostUnderStrategy(sp) == SUCCESS) {
|
||||
success++;
|
||||
MS_LOG(INFO) << name_ << " : Successfully generated " << success << " strategy";
|
||||
PrintStrategy(sp);
|
||||
}
|
||||
}
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
// if the dimension of the input b is split, we regarded it as the row slice, thus requires a AllReduce
|
||||
// otherwise it is column slice,
|
||||
Status UnsortedSegmentOpInfo::InferForwardCommunication() {
|
||||
forward_op_.clear();
|
||||
std::vector<Group> group_list;
|
||||
Shape tmp_group_tensor_map = outputs_tensor_map_.at(0);
|
||||
if (repeated_calc_num_ > 1) {
|
||||
for (size_t i = 1; i < tmp_group_tensor_map.size(); ++i) {
|
||||
tmp_group_tensor_map[i] += 1;
|
||||
}
|
||||
tmp_group_tensor_map.push_back(0);
|
||||
}
|
||||
if (CreateGroupByTensorMap(tmp_group_tensor_map, &group_list) != SUCCESS) {
|
||||
MS_LOG(ERROR) << name_ << " : Infer forward communication, create group failed.";
|
||||
return FAILED;
|
||||
} else if (group_list.empty()) {
|
||||
MS_LOG(INFO) << name_ << " : Forward all reduce is not required.";
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Operator op;
|
||||
op = CreateAllReduceOp(REDUCE_OP_SUM, group_list[0].name());
|
||||
|
||||
forward_op_.push_back(op);
|
||||
MS_LOG(INFO) << name_ << " : The group name of forward communication is " << group_list[0].name();
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Status UnsortedSegmentOpInfo::SetCostUnderStrategy(const StrategyPtr &strategy) {
|
||||
return SetCostUnderStrategyBase(strategy);
|
||||
}
|
||||
|
||||
std::shared_ptr<Strategys> UnsortedSegmentOpInfo::GenerateBatchStrategies() {
|
||||
if (inputs_shape_.size() != UNSORTEDSEGMENTOP_INPUTS_SIZE) {
|
||||
MS_LOG(EXCEPTION) << name_ << ": inputs shape size must be " << UNSORTEDSEGMENTOP_INPUTS_SIZE << ", but is "
|
||||
<< inputs_shape_.size();
|
||||
}
|
||||
CheckGlobalDeviceManager();
|
||||
size_t dev_num = g_device_manager->GetDeviceListByStageId(0).size();
|
||||
if (GetAttrs() != SUCCESS) {
|
||||
MS_LOG(EXCEPTION) << "GetAttrs failed!";
|
||||
}
|
||||
|
||||
Dimensions strategy_a;
|
||||
Dimensions strategy_b;
|
||||
strategy_a.push_back(SizeToInt(dev_num));
|
||||
for (size_t i = 1; i < inputs_shape_[0].size(); i++) {
|
||||
strategy_a.push_back(1);
|
||||
}
|
||||
|
||||
strategy_b.push_back(SizeToInt(dev_num));
|
||||
for (size_t i = 1; i < inputs_shape_[1].size(); i++) {
|
||||
strategy_b.push_back(1);
|
||||
}
|
||||
Strategys strategy_v = {strategy_a, strategy_b};
|
||||
return std::make_shared<Strategys>(strategy_v);
|
||||
}
|
||||
|
||||
// When the index is splited, the graph should be replaced
|
||||
// a special case is when the shape input equals the shape of ids, we regard it as column slice,
|
||||
// thus there is no need for repalce graphs
|
||||
ReplaceGraphPtr UnsortedSegmentMinInfo::replace_graph(const CNodePtr &cnode) {
|
||||
auto input_id_strategy = strategy_->GetInputDim().at(1);
|
||||
// 1. the two input shapes are same, and the strategy is not all ones
|
||||
if (std::any_of(input_id_strategy.begin(), input_id_strategy.end(), [](const int32_t &shard) { return shard > 1; })) {
|
||||
if (ComputeReplaceGraph(cnode) != SUCCESS) {
|
||||
MS_LOG(EXCEPTION) << name_ << ": ComputeReplaceGraph failed.";
|
||||
}
|
||||
}
|
||||
return replace_graph_;
|
||||
}
|
||||
|
||||
Status UnsortedSegmentMinInfo::ComputeReplaceGraph(const CNodePtr &cnode) {
|
||||
GenerateGraph gen_g = GenerateGraph();
|
||||
if (gen_g.Init(cnode) != SUCCESS) {
|
||||
MS_LOG(ERROR) << "GenerateGraph Init failed";
|
||||
return FAILED;
|
||||
}
|
||||
// Get the attributes of the UnsortedSegmentMin
|
||||
auto num_segments = GetValue<int>(input_value_.at(2));
|
||||
// Step1: Output branch
|
||||
auto segment_min = gen_g.PushBack({gen_g.NewOpInst(UNSORTED_SEGMENT_MIN), gen_g.virtual_input_node(),
|
||||
gen_g.virtual_input_node(), CreatInt32Imm(num_segments)});
|
||||
auto expandim_output = gen_g.PushBack({gen_g.NewOpInst(EXPAND_DIMS), segment_min, CreatInt32Imm(0)});
|
||||
auto all_gather_output = gen_g.PushBack({gen_g.NewOpInst(ALL_GATHER), expandim_output});
|
||||
auto final_output = gen_g.PushBack({gen_g.NewOpInst(REDUCE_MIN), all_gather_output, CreatInt32Imm(0)});
|
||||
|
||||
std::vector<std::pair<AnfNodePtr, int>> input_nodes = {std::make_pair(segment_min, 1),
|
||||
std::make_pair(segment_min, 2)};
|
||||
replace_graph_ = std::make_shared<std::pair<std::vector<std::pair<AnfNodePtr, int>>, AnfNodePtr>>(
|
||||
std::make_pair(input_nodes, final_output));
|
||||
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace parallel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,84 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_OPS_INFO_UNSORTEDSEGMENTOP_INFO_H_
|
||||
#define MINDSPORE_CCSRC_FRONTEND_PARALLEL_OPS_INFO_UNSORTEDSEGMENTOP_INFO_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "frontend/parallel/auto_parallel/operator_costmodel.h"
|
||||
#include "frontend/parallel/ops_info/operator_info.h"
|
||||
#include "frontend/parallel/strategy.h"
|
||||
#include "ir/value.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace parallel {
|
||||
constexpr size_t UNSORTEDSEGMENTOP_INPUTS_SIZE = 2;
|
||||
constexpr size_t UNSORTEDSEGMENTOP_OUTPUTS_SIZE = 1;
|
||||
class UnsortedSegmentOpInfo : public OperatorInfo {
|
||||
public:
|
||||
UnsortedSegmentOpInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape,
|
||||
const PrimitiveAttrs &attrs, OperatorCostPtr cost)
|
||||
: OperatorInfo(name, inputs_shape, outputs_shape, attrs, cost) {}
|
||||
~UnsortedSegmentOpInfo() override = default;
|
||||
Status Init(const StrategyPtr &strategy) override;
|
||||
Status InitForCostModel(const StrategyPtr &strategy) override;
|
||||
|
||||
Status GenerateStrategies(int32_t stage_id) override;
|
||||
Status SetCostUnderStrategy(const StrategyPtr &strategy) override;
|
||||
std::shared_ptr<Strategys> GenerateBatchStrategies() override;
|
||||
|
||||
protected:
|
||||
Status CheckStrategy(const StrategyPtr &strategy) override;
|
||||
Status InferForwardCommunication() override;
|
||||
Status InferMirrorOps() override { return SUCCESS; }
|
||||
Status InferTensorInfo() override;
|
||||
Status InferDevMatrixShape() override;
|
||||
Status InferTensorMap() override;
|
||||
Status GetAttrs() override;
|
||||
|
||||
private:
|
||||
Status ComputeReplaceGraph(const CNodePtr &cnode);
|
||||
};
|
||||
|
||||
class UnsortedSegmentSumInfo : public UnsortedSegmentOpInfo {
|
||||
public:
|
||||
UnsortedSegmentSumInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape,
|
||||
const PrimitiveAttrs &attrs)
|
||||
: UnsortedSegmentOpInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<UnsortedSegmentSumCost>()) {}
|
||||
~UnsortedSegmentSumInfo() override = default;
|
||||
};
|
||||
|
||||
class UnsortedSegmentMinInfo : public UnsortedSegmentOpInfo {
|
||||
public:
|
||||
UnsortedSegmentMinInfo(const std::string &name, const Shapes &inputs_shape, const Shapes &outputs_shape,
|
||||
const PrimitiveAttrs &attrs)
|
||||
: UnsortedSegmentOpInfo(name, inputs_shape, outputs_shape, attrs, std::make_shared<UnsortedSegmentMinCost>()) {}
|
||||
~UnsortedSegmentMinInfo() override = default;
|
||||
|
||||
ReplaceGraphPtr replace_graph(const CNodePtr &cnode) override;
|
||||
Status InferForwardCommunication() override { return SUCCESS; }
|
||||
|
||||
protected:
|
||||
Status ComputeReplaceGraph(const CNodePtr &cnode);
|
||||
};
|
||||
|
||||
} // namespace parallel
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_FRONTEND_PARALLEL_OPS_INFO_UNSORTEDSEGMENTOP_INFO_H_
|
|
@ -312,7 +312,8 @@ bool IsSplittableOperator(const std::string &op_name) {
|
|||
EMBEDDING_LOOKUP, FUSE_BATCH_NORM_EX, SPLIT, BROADCAST_TO, ABS, ACOSH, ASIN, ASINH, ATAN, ATANH, CEIL, COSH,
|
||||
EXPM1, LOG1P, SIN, SINH, TAN, RSQRT, INV, RECIPROCAL, ROUND, FLOOR, SIGN, ERF, ERFC, ZEROSLIKE, ONESLIKE,
|
||||
BESSELI0E, BESSELI1E, FLOORMOD, ASSIGN, ASSIGN_ADD, ATAN2, DIVNONAN, LOGICALAND, LOGICALOR, ELU, RELU6, RELUV2,
|
||||
SOFTPLUS, SOFTSIGN, GREATEREQUAL, LESSEQUAL, LESS, APPROXIMATEEQUAL, MOD, UNIQUE};
|
||||
SOFTPLUS, SOFTSIGN, GREATEREQUAL, LESSEQUAL, LESS, APPROXIMATEEQUAL, MOD, UNIQUE, UNSORTED_SEGMENT_SUM,
|
||||
UNSORTED_SEGMENT_MIN};
|
||||
// clang-format on
|
||||
|
||||
auto iter = splittable_op.find(op_name);
|
||||
|
|
|
@ -740,9 +740,25 @@ void StepReplaceGraph(const ReplaceGraphPtr &replace_graph, const CNodePtr &node
|
|||
if (manager == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Failure:AddNode error since manager is nullptr";
|
||||
}
|
||||
// Sovle the input order
|
||||
// For example input_node:{segment_sum:1, segment_sum:2, gahter:2}
|
||||
// The Original code here will bind the all operations to the first inputs of theses operatos
|
||||
// However, the segment_sum operation needs two inputs, To sovle this
|
||||
// We maintain a dict to count the times of the same operations,
|
||||
// and bind the inputs according to the times of the op appears.
|
||||
static std::unordered_map<AnfNodePtr, int> input_map = {};
|
||||
static int appear_count = 0;
|
||||
for (auto &replace_input : replace_graph->first) {
|
||||
auto pre_node = node->input(IntToSize(replace_input.second));
|
||||
manager->SetEdge(replace_input.first, 1, pre_node);
|
||||
|
||||
auto it = input_map.find(replace_input.first);
|
||||
if (it != input_map.end()) {
|
||||
appear_count = 1 + it->second;
|
||||
} else {
|
||||
appear_count = 1;
|
||||
}
|
||||
input_map[replace_input.first] = appear_count;
|
||||
manager->SetEdge(replace_input.first, appear_count, pre_node);
|
||||
}
|
||||
// "(void)manager->Replace(replace_graph->first, pre_node);" can not be called
|
||||
auto replace_output = replace_graph->second;
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
|
||||
import mindspore as ms
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore import context
|
||||
from mindspore.common.api import _executor
|
||||
from mindspore.ops import composite as C
|
||||
import mindspore.ops as P
|
||||
from tests.ut.python.ops.test_math_ops import VirtualLoss
|
||||
|
||||
|
||||
grad_all = C.GradOperation(get_all=True)
|
||||
|
||||
|
||||
class NetWithLoss(nn.Cell):
|
||||
def __init__(self, network):
|
||||
super(NetWithLoss, self).__init__()
|
||||
self.loss = VirtualLoss()
|
||||
self.network = network
|
||||
|
||||
def construct(self, vectors, index):
|
||||
predict = self.network(vectors, index)
|
||||
return self.loss(predict)
|
||||
|
||||
|
||||
class GradWrap(nn.Cell):
|
||||
def __init__(self, network):
|
||||
super(GradWrap, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, vectors, index):
|
||||
return grad_all(self.network)(vectors, index)
|
||||
|
||||
|
||||
def test_auto_parallel_unsortedsegmentmin():
|
||||
class Net(nn.Cell):
|
||||
def __init__(self, num_segments):
|
||||
super().__init__()
|
||||
self.merge_op = P.UnsortedSegmentMin()
|
||||
self.num_segments = num_segments
|
||||
|
||||
def construct(self, vectors, index):
|
||||
out = self.merge_op(vectors, index, self.num_segments)
|
||||
return out
|
||||
|
||||
size = 8
|
||||
context.set_auto_parallel_context(device_num=size, global_rank=0)
|
||||
context.set_auto_parallel_context(parallel_mode="auto_parallel")
|
||||
|
||||
x = Tensor(np.random.rand(16, 16, 32, 64), dtype=ms.float32)
|
||||
indices = Tensor(np.random.randint(16, size=(16,)), ms.int32)
|
||||
|
||||
net = GradWrap(NetWithLoss(Net(16)))
|
||||
net.set_auto_parallel()
|
||||
net.set_train()
|
||||
_executor.compile(net, x, indices)
|
|
@ -0,0 +1,71 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
|
||||
import mindspore as ms
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore import context
|
||||
from mindspore.common.api import _executor
|
||||
from mindspore.ops import composite as C
|
||||
import mindspore.ops as P
|
||||
from tests.ut.python.ops.test_math_ops import VirtualLoss
|
||||
|
||||
|
||||
grad_all = C.GradOperation(get_all=True)
|
||||
|
||||
|
||||
class NetWithLoss(nn.Cell):
|
||||
def __init__(self, network):
|
||||
super(NetWithLoss, self).__init__()
|
||||
self.loss = VirtualLoss()
|
||||
self.network = network
|
||||
|
||||
def construct(self, vectors, index):
|
||||
predict = self.network(vectors, index)
|
||||
return self.loss(predict)
|
||||
|
||||
|
||||
class GradWrap(nn.Cell):
|
||||
def __init__(self, network):
|
||||
super(GradWrap, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, vectors, index):
|
||||
return grad_all(self.network)(vectors, index)
|
||||
|
||||
|
||||
def test_auto_parallel_unsortedsegmentsum():
|
||||
class Net(nn.Cell):
|
||||
def __init__(self, num_segments):
|
||||
super().__init__()
|
||||
self.merge_op = P.UnsortedSegmentSum()
|
||||
self.num_segments = num_segments
|
||||
|
||||
def construct(self, vectors, index):
|
||||
out = self.merge_op(vectors, index, self.num_segments)
|
||||
return out
|
||||
|
||||
size = 8
|
||||
context.set_auto_parallel_context(device_num=size, global_rank=0)
|
||||
context.set_auto_parallel_context(parallel_mode="auto_parallel")
|
||||
|
||||
x = Tensor(np.random.rand(16, 16, 32, 64), dtype=ms.float32)
|
||||
indices = Tensor(np.random.randint(16, size=(16, 16)))
|
||||
|
||||
net = GradWrap(NetWithLoss(Net(16)))
|
||||
net.set_auto_parallel()
|
||||
net.set_train()
|
||||
_executor.compile(net, x, indices)
|
|
@ -0,0 +1,161 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
|
||||
import mindspore as ms
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore import context
|
||||
from mindspore.common.api import _executor
|
||||
from mindspore.ops import composite as C
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.operations.comm_ops import _VirtualDataset
|
||||
from tests.ut.python.ops.test_math_ops import VirtualLoss
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
|
||||
|
||||
grad_all = C.GradOperation(get_all=True)
|
||||
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, num_segments):
|
||||
super(Net, self).__init__()
|
||||
self.virtual_dataset = _VirtualDataset()
|
||||
self.merge_op = P.UnsortedSegmentMin().shard((strategy1, strategy2))
|
||||
self.num_segments = num_segments
|
||||
|
||||
def construct(self, vectors, segment_ids):
|
||||
predict = self.merge_op(vectors, segment_ids, self.num_segments)
|
||||
return predict
|
||||
|
||||
|
||||
class GradWrap(nn.Cell):
|
||||
def __init__(self, network):
|
||||
super(GradWrap, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y):
|
||||
return grad_all(self.network)(x, y)
|
||||
|
||||
|
||||
class NetWithLoss(nn.Cell):
|
||||
def __init__(self, network):
|
||||
super(NetWithLoss, self).__init__()
|
||||
self.loss = VirtualLoss()
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y):
|
||||
predict = self.network(x, y)
|
||||
return self.loss(predict)
|
||||
|
||||
|
||||
def compile_graph(x, y, segments, strategy1, strategy2, auto=False):
|
||||
net = GradWrap(NetWithLoss(Net(strategy1, strategy2, segments)))
|
||||
net.set_auto_parallel()
|
||||
net.set_train()
|
||||
if auto:
|
||||
context.set_auto_parallel_context(parallel_mode="auto_parallel")
|
||||
else:
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
_executor.compile(net, x, y)
|
||||
|
||||
|
||||
def test_unsortedsegmentmin_model_parallel_slice_1d():
|
||||
context.set_auto_parallel_context(device_num=8, global_rank=0)
|
||||
x = Tensor(np.ones(8), ms.float32)
|
||||
y = Tensor(np.ones(8), ms.int32)
|
||||
num_segments = 16
|
||||
strategy1 = (8,)
|
||||
strategy2 = (8,)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
||||
|
||||
|
||||
def test_unsortedsegmentmin_model_parallel_no_slice_1d():
|
||||
context.set_auto_parallel_context(device_num=8, global_rank=0)
|
||||
x = Tensor(np.ones(8), ms.float32)
|
||||
y = Tensor(np.ones(8), ms.int32)
|
||||
num_segments = 16
|
||||
strategy1 = (1,)
|
||||
strategy2 = (1,)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
||||
|
||||
|
||||
def test_unsortedsegmentmin_model_parallel_index_slice_2d():
|
||||
context.set_auto_parallel_context(device_num=4, global_rank=0)
|
||||
x = Tensor(np.ones((4, 8)), ms.float32)
|
||||
y = Tensor(np.arange(4), ms.int32)
|
||||
num_segments = 4
|
||||
strategy1 = (4, 1)
|
||||
strategy2 = (4,)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
||||
|
||||
|
||||
def test_unsortedsegmentmin_model_parallel_vector_slice_2d():
|
||||
context.set_auto_parallel_context(device_num=4, global_rank=0)
|
||||
x = Tensor(np.ones((4, 8)), ms.float32)
|
||||
y = Tensor(np.ones(4), ms.int32)
|
||||
num_segments = 4
|
||||
strategy1 = (1, 4)
|
||||
strategy2 = (1,)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
||||
|
||||
|
||||
def test_unsortedsegmentmin_model_parallel_vector_slice_3d():
|
||||
context.set_auto_parallel_context(device_num=4, global_rank=0)
|
||||
x = Tensor(np.ones((4, 8, 8)), ms.float32)
|
||||
y = Tensor(np.ones(4), ms.int32)
|
||||
num_segments = 4
|
||||
strategy1 = (1, 2, 2)
|
||||
strategy2 = (1,)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
||||
|
||||
|
||||
def test_unsortedsegmentmin_model_parallel_index_vector_slice_2d():
|
||||
context.set_auto_parallel_context(device_num=4, global_rank=0)
|
||||
x = Tensor(np.ones((4, 8)), ms.float32)
|
||||
y = Tensor(np.ones(4), ms.int32)
|
||||
num_segments = 4
|
||||
strategy1 = (2, 2)
|
||||
strategy2 = (2,)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
||||
|
||||
|
||||
def test_unsortedsegmentmin_model_parallel_index_vector_slice_3d():
|
||||
context.set_auto_parallel_context(device_num=4, global_rank=0)
|
||||
x = Tensor(np.ones((4, 4, 8)), ms.float32)
|
||||
y = Tensor(np.ones((4)), ms.int32)
|
||||
num_segments = 16
|
||||
strategy1 = (2, 1, 2)
|
||||
strategy2 = (2,)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
||||
|
||||
def test_unsortedsegmentmin_model_parallel_float16():
|
||||
context.set_auto_parallel_context(device_num=4, global_rank=0)
|
||||
x = Tensor(np.ones((4, 4, 8)), ms.float16)
|
||||
y = Tensor(np.ones((4)), ms.int32)
|
||||
num_segments = 16
|
||||
strategy1 = (2, 1, 2)
|
||||
strategy2 = (2,)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
||||
|
||||
def test_unsortedsegmentmin_model_parallel_int32():
|
||||
context.set_auto_parallel_context(device_num=4, global_rank=0)
|
||||
x = Tensor(np.ones((4, 4, 8)), ms.int32)
|
||||
y = Tensor(np.ones((4)), ms.int32)
|
||||
num_segments = 16
|
||||
strategy1 = (2, 1, 2)
|
||||
strategy2 = (2,)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
|
@ -0,0 +1,153 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
|
||||
import mindspore as ms
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore import context
|
||||
from mindspore.common.api import _executor
|
||||
from mindspore.ops import composite as C
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.operations.comm_ops import _VirtualDataset
|
||||
from tests.ut.python.ops.test_math_ops import VirtualLoss
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE)
|
||||
|
||||
|
||||
grad_all = C.GradOperation(get_all=True)
|
||||
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self, strategy1, strategy2, num_segments):
|
||||
super(Net, self).__init__()
|
||||
self.virtual_dataset = _VirtualDataset()
|
||||
self.merge_op = P.UnsortedSegmentSum().shard((strategy1, strategy2))
|
||||
self.num_segments = num_segments
|
||||
|
||||
def construct(self, vectors, segment_ids):
|
||||
predict = self.merge_op(vectors, segment_ids, self.num_segments)
|
||||
return predict
|
||||
|
||||
|
||||
class GradWrap(nn.Cell):
|
||||
def __init__(self, network):
|
||||
super(GradWrap, self).__init__()
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y):
|
||||
return grad_all(self.network)(x, y)
|
||||
|
||||
|
||||
class NetWithLoss(nn.Cell):
|
||||
def __init__(self, network):
|
||||
super(NetWithLoss, self).__init__()
|
||||
self.loss = VirtualLoss()
|
||||
self.network = network
|
||||
|
||||
def construct(self, x, y):
|
||||
predict = self.network(x, y)
|
||||
return self.loss(predict)
|
||||
|
||||
|
||||
def compile_graph(x, y, segments, strategy1, strategy2, auto=False):
|
||||
net = GradWrap(NetWithLoss(Net(strategy1, strategy2, segments)))
|
||||
net.set_auto_parallel()
|
||||
net.set_train()
|
||||
if auto:
|
||||
context.set_auto_parallel_context(parallel_mode="auto_parallel")
|
||||
else:
|
||||
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
|
||||
_executor.compile(net, x, y)
|
||||
|
||||
|
||||
def test_unsortedsegmentsum_model_parallel_slice_1d():
|
||||
context.set_auto_parallel_context(device_num=8, global_rank=0)
|
||||
x = Tensor(np.ones(8), ms.float32)
|
||||
y = Tensor(np.ones(8), ms.int32)
|
||||
num_segments = 16
|
||||
strategy1 = (8,)
|
||||
strategy2 = (8,)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
||||
|
||||
|
||||
def test_unsortedsegmentsum_model_parallel_no_slice_1d():
|
||||
context.set_auto_parallel_context(device_num=8, global_rank=0)
|
||||
x = Tensor(np.ones(8), ms.float32)
|
||||
y = Tensor(np.ones(8), ms.int32)
|
||||
num_segments = 16
|
||||
strategy1 = (1,)
|
||||
strategy2 = (1,)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
||||
|
||||
|
||||
def test_unsortedsegmentsum_model_parallel_index_slice_2d():
|
||||
context.set_auto_parallel_context(device_num=4, global_rank=0)
|
||||
x = Tensor(np.ones((4, 8)), ms.float32)
|
||||
y = Tensor(np.arange(4), ms.int32)
|
||||
num_segments = 4
|
||||
strategy1 = (4, 1)
|
||||
strategy2 = (4,)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
||||
|
||||
|
||||
def test_unsortedsegmentsum_model_parallel_index_slice_3d():
|
||||
context.set_auto_parallel_context(device_num=4, global_rank=0)
|
||||
x = Tensor(np.ones((4, 4, 8)), ms.float32)
|
||||
y = Tensor(np.ones((4, 4)), ms.int32)
|
||||
num_segments = 16
|
||||
strategy1 = (2, 2, 1)
|
||||
strategy2 = (2, 2)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
||||
|
||||
|
||||
def test_unsortedsegmentsum_model_parallel_vector_slice_2d():
|
||||
context.set_auto_parallel_context(device_num=4, global_rank=0)
|
||||
x = Tensor(np.ones((4, 8)), ms.float32)
|
||||
y = Tensor(np.ones(4), ms.int32)
|
||||
num_segments = 4
|
||||
strategy1 = (1, 4)
|
||||
strategy2 = (1,)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
||||
|
||||
|
||||
def test_unsortedsegmentsum_model_parallel_vector_slice_3d():
|
||||
context.set_auto_parallel_context(device_num=4, global_rank=0)
|
||||
x = Tensor(np.ones((4, 8, 8)), ms.float32)
|
||||
y = Tensor(np.ones(4), ms.int32)
|
||||
num_segments = 4
|
||||
strategy1 = (1, 2, 2)
|
||||
strategy2 = (1,)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
||||
|
||||
|
||||
def test_unsortedsegmentsum_model_parallel_index_vector_slice_2d():
|
||||
context.set_auto_parallel_context(device_num=4, global_rank=0)
|
||||
x = Tensor(np.ones((4, 8)), ms.float32)
|
||||
y = Tensor(np.ones(4), ms.int32)
|
||||
num_segments = 4
|
||||
strategy1 = (2, 2)
|
||||
strategy2 = (2,)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
||||
|
||||
|
||||
def test_unsortedsegmentsum_model_parallel_index_vector_slice_3d():
|
||||
context.set_auto_parallel_context(device_num=4, global_rank=0)
|
||||
x = Tensor(np.ones((4, 4, 8)), ms.float32)
|
||||
y = Tensor(np.ones((4, 4)), ms.int32)
|
||||
num_segments = 16
|
||||
strategy1 = (2, 1, 2)
|
||||
strategy2 = (2, 1)
|
||||
compile_graph(x, y, num_segments, strategy1, strategy2)
|
Loading…
Reference in New Issue