forked from mindspore-Ecosystem/mindspore
!30589 [DynamicShape][GPU]add dynamic shape support some ops for new network
Merge pull request !30589 from hanhuifeng/dcn_dyn_ops_2_new
This commit is contained in:
commit
9a5268f2be
|
@ -56,5 +56,22 @@ MS_REG_GPU_KERNEL_ONE(ReduceProd, KernelAttr().AddInputAttr(kNumberTypeFloat32).
|
|||
ArrayReduceGpuKernelMod, float)
|
||||
MS_REG_GPU_KERNEL_ONE(ReduceProd, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
ArrayReduceGpuKernelMod, double)
|
||||
|
||||
// dynamic
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
ReduceSum,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat64),
|
||||
ArrayReduceGpuKernelMod, double, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
ReduceSum,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32),
|
||||
ArrayReduceGpuKernelMod, float, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
ReduceSum,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat16),
|
||||
ArrayReduceGpuKernelMod, half, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
ReduceSum, KernelAttr().AddInputAttr(kNumberTypeBool).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeBool),
|
||||
ArrayReduceGpuKernelMod, bool, int64_t)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -32,7 +32,7 @@ const std::map<std::string, cudnnReduceTensorOp_t> kReduceTypeMap = {
|
|||
{"ReduceAny", CUDNN_REDUCE_TENSOR_MAX}, {"ReduceAll", CUDNN_REDUCE_TENSOR_MUL},
|
||||
{"ReduceProd", CUDNN_REDUCE_TENSOR_MUL},
|
||||
};
|
||||
template <typename T>
|
||||
template <typename T, typename S = int64_t>
|
||||
class ArrayReduceGpuKernelMod : public NativeGpuKernelMod {
|
||||
public:
|
||||
ArrayReduceGpuKernelMod() { ResetResource(); }
|
||||
|
@ -43,6 +43,9 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod {
|
|||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
if (is_dynamic_axis_ && !get_dynamic_axis_value_) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', fail to get value of the axis when axis is dynamic!";
|
||||
}
|
||||
T *input_addr = GetDeviceAddress<T>(inputs, 0);
|
||||
T *output_addr = GetDeviceAddress<T>(outputs, 0);
|
||||
T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
|
||||
|
@ -87,8 +90,13 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod {
|
|||
}
|
||||
data_type_ = GetCudnnDataType(type_name);
|
||||
size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1, but got " << input_num;
|
||||
constexpr size_t kDynamicAxisInputNum = 2;
|
||||
if (input_num != 1 && input_num != kDynamicAxisInputNum) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs should be 1 or " << kDynamicAxisInputNum
|
||||
<< ", but got " << input_num;
|
||||
}
|
||||
if (input_num == kDynamicAxisInputNum) {
|
||||
is_dynamic_axis_ = true;
|
||||
}
|
||||
size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
|
@ -96,29 +104,28 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod {
|
|||
}
|
||||
int input_dim_length = SizeToInt(AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0).size());
|
||||
|
||||
auto prim = common::AnfAlgo::GetCNodePrimitive(kernel_node);
|
||||
MS_EXCEPTION_IF_NULL(prim);
|
||||
if (prim->GetAttr("axis")->isa<ValueTuple>() || prim->GetAttr("axis")->isa<ValueList>()) {
|
||||
std::vector<int> attr_axis;
|
||||
std::vector<int64_t> attr_axis_me = GetAttr<std::vector<int64_t>>(kernel_node, "axis");
|
||||
(void)std::transform(attr_axis_me.begin(), attr_axis_me.end(), std::back_inserter(attr_axis),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
if (attr_axis.empty()) {
|
||||
axis_.push_back(-1);
|
||||
} else {
|
||||
for (auto axis : attr_axis) {
|
||||
axis < 0 ? axis_.push_back(axis + input_dim_length) : axis_.push_back(axis);
|
||||
}
|
||||
std::sort(axis_.begin(), axis_.end());
|
||||
auto multiple_pos = std::unique(axis_.begin(), axis_.end());
|
||||
axis_.erase(multiple_pos, axis_.end());
|
||||
std::vector<int64_t> attr_axis;
|
||||
if (is_dynamic_axis_) {
|
||||
get_dynamic_axis_value_ = GetDynamicAttrIntValue(kernel_node, kAxisIndex_, &attr_axis);
|
||||
if (!get_dynamic_axis_value_) {
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
} else if (prim->GetAttr("axis")->isa<Int64Imm>()) {
|
||||
int axis = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
axis < 0 ? axis_.push_back(axis + input_dim_length) : axis_.push_back(axis);
|
||||
dynamic_axis_size_ = attr_axis.size();
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute 'axis' type is invalid.";
|
||||
attr_axis = GetAxisValue(kernel_node);
|
||||
}
|
||||
if (attr_axis.empty()) {
|
||||
axis_.push_back(-1);
|
||||
} else {
|
||||
for (auto axis : attr_axis) {
|
||||
axis < 0 ? axis_.push_back(axis + input_dim_length) : axis_.push_back(axis);
|
||||
}
|
||||
std::sort(axis_.begin(), axis_.end());
|
||||
auto multiple_pos = std::unique(axis_.begin(), axis_.end());
|
||||
axis_.erase(multiple_pos, axis_.end());
|
||||
}
|
||||
|
||||
keep_dims_ = GetAttr<bool>(kernel_node, "keep_dims");
|
||||
|
||||
auto inputA_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
|
||||
|
@ -156,6 +163,9 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod {
|
|||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
dynamic_axis_size_ = 0;
|
||||
is_dynamic_axis_ = false;
|
||||
get_dynamic_axis_value_ = false;
|
||||
}
|
||||
|
||||
void DestroyResource() noexcept override {
|
||||
|
@ -182,6 +192,10 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod {
|
|||
"cudnnGetTensorSizeInBytes failed.");
|
||||
input_size_list_.push_back(input_size_);
|
||||
|
||||
if (is_dynamic_axis_) {
|
||||
input_size_list_.push_back(dynamic_axis_size_ * sizeof(S));
|
||||
}
|
||||
|
||||
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnGetTensorSizeInBytes(outputC_descriptor_, &output_size_),
|
||||
"cudnnGetTensorSizeInBytes failed.");
|
||||
output_size_list_.push_back(output_size_);
|
||||
|
@ -279,6 +293,21 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod {
|
|||
return;
|
||||
}
|
||||
|
||||
std::vector<int64_t> GetAxisValue(const CNodePtr &kernel_node) {
|
||||
auto prim = common::AnfAlgo::GetCNodePrimitive(kernel_node);
|
||||
MS_EXCEPTION_IF_NULL(prim);
|
||||
std::vector<int64_t> attr_axis_me;
|
||||
if (prim->GetAttr("axis")->isa<ValueTuple>() || prim->GetAttr("axis")->isa<ValueList>()) {
|
||||
attr_axis_me = GetAttr<std::vector<int64_t>>(kernel_node, "axis");
|
||||
} else if (prim->GetAttr("axis")->isa<Int64Imm>()) {
|
||||
auto axis = GetAttr<int64_t>(kernel_node, "axis");
|
||||
attr_axis_me.push_back(axis);
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', attribute 'axis' type is invalid.";
|
||||
}
|
||||
return attr_axis_me;
|
||||
}
|
||||
|
||||
cudnnHandle_t cudnn_handle_;
|
||||
cudnnReduceTensorOp_t reduce_tensor_op_;
|
||||
cudnnDataType_t data_type_;
|
||||
|
@ -295,7 +324,10 @@ class ArrayReduceGpuKernelMod : public NativeGpuKernelMod {
|
|||
size_t input_size_;
|
||||
size_t output_size_;
|
||||
size_t workspace_size_;
|
||||
std::string kernel_name_;
|
||||
size_t dynamic_axis_size_;
|
||||
bool is_dynamic_axis_;
|
||||
bool get_dynamic_axis_value_;
|
||||
static constexpr size_t kAxisIndex_{1};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -33,18 +33,18 @@ namespace kernel {
|
|||
|
||||
REG_SLICE_GPU_DTYPES(REG_SLICE_GPU)
|
||||
|
||||
#define REG_DYNAMIC_SLICE_GPU_ATTR(T0_MS_DTYPE, T0_DTYPE, T1_MS_DTYPE) \
|
||||
MS_REG_GPU_KERNEL_ONE(Slice, \
|
||||
KernelAttr() \
|
||||
.AddInputAttr(T0_MS_DTYPE) \
|
||||
.AddInputAttr(T1_MS_DTYPE) \
|
||||
.AddInputAttr(T1_MS_DTYPE) \
|
||||
.AddOutputAttr(T0_MS_DTYPE), \
|
||||
SliceFwdGpuKernelMod, T0_DTYPE)
|
||||
#define REG_DYNAMIC_SLICE_GPU_ATTR(T0_MS_DTYPE, T0_DTYPE, T1_MS_DTYPE, T1_DTYPE) \
|
||||
MS_REG_GPU_KERNEL_TWO(Slice, \
|
||||
KernelAttr() \
|
||||
.AddInputAttr(T0_MS_DTYPE) \
|
||||
.AddInputAttr(T1_MS_DTYPE) \
|
||||
.AddInputAttr(T1_MS_DTYPE) \
|
||||
.AddOutputAttr(T0_MS_DTYPE), \
|
||||
SliceFwdGpuKernelMod, T0_DTYPE, T1_DTYPE)
|
||||
|
||||
#define REG_DYNAMIC_SLICE_GPU(MS_DTYPE, DTYPE) \
|
||||
REG_DYNAMIC_SLICE_GPU_ATTR(MS_DTYPE, DTYPE, kNumberTypeInt32) \
|
||||
REG_DYNAMIC_SLICE_GPU_ATTR(MS_DTYPE, DTYPE, kNumberTypeInt64)
|
||||
#define REG_DYNAMIC_SLICE_GPU(MS_DTYPE, DTYPE) \
|
||||
REG_DYNAMIC_SLICE_GPU_ATTR(MS_DTYPE, DTYPE, kNumberTypeInt32, int32_t) \
|
||||
REG_DYNAMIC_SLICE_GPU_ATTR(MS_DTYPE, DTYPE, kNumberTypeInt64, int64_t)
|
||||
|
||||
REG_SLICE_GPU_DTYPES(REG_DYNAMIC_SLICE_GPU)
|
||||
} // namespace kernel
|
||||
|
|
|
@ -40,7 +40,7 @@ constexpr auto kIdx4 = 4;
|
|||
constexpr auto kIdx5 = 5;
|
||||
constexpr auto kIdx6 = 6;
|
||||
|
||||
template <typename T>
|
||||
template <typename T, typename S = int64_t>
|
||||
class SliceFwdGpuKernelMod : public NativeGpuKernelMod {
|
||||
public:
|
||||
SliceFwdGpuKernelMod() { kernel_name_ = "Slice"; }
|
||||
|
@ -106,7 +106,10 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod {
|
|||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
(void)CheckParam(kernel_node);
|
||||
|
||||
if (is_dynamic_attr_ && !get_dynamic_attr_value_) {
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
|
||||
auto out_shape = AnfAlgo::GetOutputDeviceShapeAdaptively(kernel_node, 0);
|
||||
is_null_input_ =
|
||||
|
@ -118,27 +121,15 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod {
|
|||
(void)std::transform(input_shape.begin(), input_shape.end(), std::back_inserter(input_shape_),
|
||||
[](const int64_t &e) { return static_cast<int32_t>(e); });
|
||||
|
||||
size_t input_size = sizeof(T);
|
||||
input_size_ = sizeof(T);
|
||||
for (size_t x : input_shape) {
|
||||
input_size *= x;
|
||||
}
|
||||
input_size_list_.push_back(input_size);
|
||||
if (is_dynamic_attr_) {
|
||||
std::vector<size_t> dynamic_attr_indexs = {kBeginIndex_, kSizeIndex_};
|
||||
for (size_t index : dynamic_attr_indexs) {
|
||||
input_size = sizeof(T);
|
||||
for (size_t x : AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, index)) {
|
||||
input_size *= x;
|
||||
}
|
||||
input_size_list_.push_back(input_size);
|
||||
}
|
||||
input_size_ *= x;
|
||||
}
|
||||
|
||||
size_t output_size = sizeof(T);
|
||||
output_size_ = sizeof(T);
|
||||
for (size_t x : out_shape) {
|
||||
output_size *= x;
|
||||
output_size_ *= x;
|
||||
}
|
||||
output_size_list_.push_back(output_size);
|
||||
|
||||
// transpose begin and size for NHWC data
|
||||
auto data_format = AnfAlgo::GetInputFormat(kernel_node, 0);
|
||||
|
@ -166,6 +157,8 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod {
|
|||
begin_.clear();
|
||||
size_.clear();
|
||||
input_shape_.clear();
|
||||
input_size_ = 0;
|
||||
output_size_ = 0;
|
||||
is_null_input_ = false;
|
||||
kernel_name_ = "Slice";
|
||||
is_dynamic_attr_ = false;
|
||||
|
@ -173,7 +166,29 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod {
|
|||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {}
|
||||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(input_size_);
|
||||
output_size_list_.push_back(output_size_);
|
||||
if (is_dynamic_attr_) {
|
||||
InitDynamicAttrSizeLists(kernel_node_.lock());
|
||||
}
|
||||
}
|
||||
|
||||
inline void InitDynamicAttrSizeLists(const CNodePtr &kernel_node) {
|
||||
if (get_dynamic_attr_value_) {
|
||||
std::vector<size_t> dynamic_attr_indexs = {kBeginIndex_, kSizeIndex_};
|
||||
for (size_t index : dynamic_attr_indexs) {
|
||||
size_t input_size = sizeof(S);
|
||||
for (size_t x : AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, index)) {
|
||||
input_size *= x;
|
||||
}
|
||||
input_size_list_.push_back(input_size);
|
||||
}
|
||||
} else {
|
||||
input_size_list_.push_back(0);
|
||||
input_size_list_.push_back(0);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void CheckParam(const CNodePtr &kernel_node) {
|
||||
|
@ -205,13 +220,13 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod {
|
|||
size = GetAttr<std::vector<int64_t>>(kernel_node, "size");
|
||||
begin = GetAttr<std::vector<int64_t>>(kernel_node, "begin");
|
||||
} else {
|
||||
// The value of dynamic attr can only be obtained after the InferShape() of dynamic kernel is executed
|
||||
if (depend_tensor_map_.empty()) {
|
||||
if (GetDynamicAttrIntValue(kernel_node, kBeginIndex_, &begin) &&
|
||||
GetDynamicAttrIntValue(kernel_node, kSizeIndex_, &size)) {
|
||||
get_dynamic_attr_value_ = true;
|
||||
}
|
||||
if (!get_dynamic_attr_value_) {
|
||||
return;
|
||||
}
|
||||
begin = GetDynamicAttrIntValue(kernel_node, kBeginIndex_);
|
||||
size = GetDynamicAttrIntValue(kernel_node, kSizeIndex_);
|
||||
get_dynamic_attr_value_ = true;
|
||||
}
|
||||
|
||||
if (size.size() != input_shape.size() || begin.size() != input_shape.size()) {
|
||||
|
@ -220,9 +235,8 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod {
|
|||
<< "of size: " << size.size() << ", the dimension of begin: " << begin.size()
|
||||
<< ", the dimension of input_x: " << input_shape.size();
|
||||
}
|
||||
const int64_t NEG_ONE = -1;
|
||||
for (size_t i = 0; i < input_shape.size(); i++) {
|
||||
if (size[i] == NEG_ONE) {
|
||||
if (size[i] == -1) {
|
||||
size[i] = input_shape[i] - begin[i];
|
||||
}
|
||||
if (input_shape[i] <= 0 || size[i] <= 0) {
|
||||
|
@ -245,6 +259,8 @@ class SliceFwdGpuKernelMod : public NativeGpuKernelMod {
|
|||
std::vector<int32_t> size_;
|
||||
std::vector<int32_t> input_shape_;
|
||||
|
||||
size_t input_size_{0};
|
||||
size_t output_size_{0};
|
||||
bool is_null_input_{false};
|
||||
bool is_dynamic_attr_{false};
|
||||
bool get_dynamic_attr_value_{false};
|
||||
|
|
|
@ -346,9 +346,12 @@ class NativeGpuKernelMod : public GpuKernelMod {
|
|||
return std::equal(s1.begin(), s1.end(), s2_trans.begin(), s2_trans.end());
|
||||
}
|
||||
|
||||
inline std::vector<int64_t> GetDynamicAttrIntValue(const CNodePtr &kernel_node, const size_t input_index) {
|
||||
inline bool GetDynamicAttrIntValue(const CNodePtr &kernel_node, const size_t input_index,
|
||||
std::vector<int64_t> *attr_value) {
|
||||
// The value of dynamic attr can only be obtained after the InferShape() is executed
|
||||
if (depend_tensor_map_.empty()) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the depend_tensor_map is empty!";
|
||||
MS_LOG(DEBUG) << "For '" << kernel_name_ << "', the depend_tensor_map is currently empty";
|
||||
return false;
|
||||
}
|
||||
auto depend_iter = depend_tensor_map_.find(input_index);
|
||||
if (depend_iter == depend_tensor_map_.end()) {
|
||||
|
@ -366,7 +369,8 @@ class NativeGpuKernelMod : public GpuKernelMod {
|
|||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the format of the " << input_index
|
||||
<< "th input currently should be the default format and does not support " << data_format;
|
||||
}
|
||||
return GetTensorIntValue(input_tensor, input_index);
|
||||
*attr_value = GetTensorIntValue(input_tensor, input_index);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
} // namespace kernel
|
||||
|
|
|
@ -33,30 +33,7 @@ constexpr size_t NO_CUDNN_BATCHNORM_OPS_BN_INPUT_NUM = 8;
|
|||
template <typename T>
|
||||
class BatchNormGradGpuKernelMod : public NativeGpuKernelMod {
|
||||
public:
|
||||
BatchNormGradGpuKernelMod()
|
||||
: batch_(0),
|
||||
channel_(0),
|
||||
height_(0),
|
||||
width_(0),
|
||||
x_size_(0),
|
||||
para_size_(0),
|
||||
workspace_size_(0),
|
||||
reserve_size_(0),
|
||||
mode_(CUDNN_BATCHNORM_SPATIAL),
|
||||
bn_ops_(CUDNN_BATCHNORM_OPS_BN),
|
||||
epsilon_(10e-5),
|
||||
is_train_(false),
|
||||
is_null_input_(false),
|
||||
x_desc_(nullptr),
|
||||
y_desc_(nullptr),
|
||||
dy_desc_(nullptr),
|
||||
dx_desc_(nullptr),
|
||||
dz_desc_(nullptr),
|
||||
scale_bias_diff_desc_(nullptr),
|
||||
activation_desc_(nullptr),
|
||||
handle_(nullptr),
|
||||
cudnn_data_type_(CUDNN_DATA_FLOAT),
|
||||
beta_data_diff_(0) {}
|
||||
BatchNormGradGpuKernelMod() { ResetResource(); }
|
||||
~BatchNormGradGpuKernelMod() override { DestroyResource(); }
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
|
@ -171,6 +148,33 @@ class BatchNormGradGpuKernelMod : public NativeGpuKernelMod {
|
|||
return true;
|
||||
}
|
||||
|
||||
void ResetResource() noexcept override {
|
||||
ResetSizeLists();
|
||||
batch_ = 0;
|
||||
channel_ = 0;
|
||||
height_ = 0;
|
||||
width_ = 0;
|
||||
x_size_ = 0;
|
||||
para_size_ = 0;
|
||||
workspace_size_ = 0;
|
||||
reserve_size_ = 0;
|
||||
mode_ = CUDNN_BATCHNORM_SPATIAL;
|
||||
bn_ops_ = CUDNN_BATCHNORM_OPS_BN;
|
||||
epsilon_ = 10e-5;
|
||||
is_train_ = false;
|
||||
is_null_input_ = false;
|
||||
x_desc_ = nullptr;
|
||||
y_desc_ = nullptr;
|
||||
dy_desc_ = nullptr;
|
||||
dx_desc_ = nullptr;
|
||||
dz_desc_ = nullptr;
|
||||
scale_bias_diff_desc_ = nullptr;
|
||||
activation_desc_ = nullptr;
|
||||
handle_ = nullptr;
|
||||
cudnn_data_type_ = CUDNN_DATA_FLOAT;
|
||||
beta_data_diff_ = 0;
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitResource() override {
|
||||
handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace {
|
||||
// float64
|
||||
MS_REG_GPU_KERNEL_ONE(Flatten, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
FlattenFwdGpuKernelMod, double)
|
||||
|
@ -25,6 +26,7 @@ MS_REG_GPU_KERNEL_ONE(Reshape, KernelAttr().AddInputAttr(kNumberTypeFloat64).Add
|
|||
FlattenFwdGpuKernelMod, double)
|
||||
MS_REG_GPU_KERNEL_ONE(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
FlattenFwdGpuKernelMod, double)
|
||||
|
||||
// float32
|
||||
MS_REG_GPU_KERNEL_ONE(Flatten, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
FlattenFwdGpuKernelMod, float)
|
||||
|
@ -32,6 +34,7 @@ MS_REG_GPU_KERNEL_ONE(Reshape, KernelAttr().AddInputAttr(kNumberTypeFloat32).Add
|
|||
FlattenFwdGpuKernelMod, float)
|
||||
MS_REG_GPU_KERNEL_ONE(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
FlattenFwdGpuKernelMod, float)
|
||||
|
||||
// float16
|
||||
MS_REG_GPU_KERNEL_ONE(Flatten, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
FlattenFwdGpuKernelMod, half)
|
||||
|
@ -39,6 +42,7 @@ MS_REG_GPU_KERNEL_ONE(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeFloat16).
|
|||
FlattenFwdGpuKernelMod, half)
|
||||
MS_REG_GPU_KERNEL_ONE(Reshape, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
FlattenFwdGpuKernelMod, half)
|
||||
|
||||
// int64
|
||||
MS_REG_GPU_KERNEL_ONE(Flatten, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
FlattenFwdGpuKernelMod, int64_t)
|
||||
|
@ -46,13 +50,15 @@ MS_REG_GPU_KERNEL_ONE(Reshape, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOu
|
|||
FlattenFwdGpuKernelMod, int64_t)
|
||||
MS_REG_GPU_KERNEL_ONE(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
FlattenFwdGpuKernelMod, int64_t)
|
||||
|
||||
// int32
|
||||
MS_REG_GPU_KERNEL_ONE(Flatten, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
FlattenFwdGpuKernelMod, int)
|
||||
FlattenFwdGpuKernelMod, int32_t)
|
||||
MS_REG_GPU_KERNEL_ONE(Reshape, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
FlattenFwdGpuKernelMod, int)
|
||||
FlattenFwdGpuKernelMod, int32_t)
|
||||
MS_REG_GPU_KERNEL_ONE(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
FlattenFwdGpuKernelMod, int)
|
||||
FlattenFwdGpuKernelMod, int32_t)
|
||||
|
||||
// int16
|
||||
MS_REG_GPU_KERNEL_ONE(Flatten, KernelAttr().AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
|
||||
FlattenFwdGpuKernelMod, int16_t)
|
||||
|
@ -107,5 +113,71 @@ MS_REG_GPU_KERNEL_ONE(Reshape, KernelAttr().AddInputAttr(kNumberTypeBool).AddOut
|
|||
FlattenFwdGpuKernelMod, bool)
|
||||
MS_REG_GPU_KERNEL_ONE(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
|
||||
FlattenFwdGpuKernelMod, bool)
|
||||
} // namespace
|
||||
|
||||
// dynamic kernel:
|
||||
namespace {
|
||||
// float64
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
Reshape,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat64),
|
||||
FlattenFwdGpuKernelMod, double, int64_t)
|
||||
|
||||
// float32
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
Reshape,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat32),
|
||||
FlattenFwdGpuKernelMod, float, int64_t)
|
||||
|
||||
// float16
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
Reshape,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeFloat16),
|
||||
FlattenFwdGpuKernelMod, half, int64_t)
|
||||
|
||||
// int64
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
Reshape, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
FlattenFwdGpuKernelMod, int64_t, int64_t)
|
||||
|
||||
// int32
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
Reshape, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32),
|
||||
FlattenFwdGpuKernelMod, int32_t, int64_t)
|
||||
|
||||
// int16
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
Reshape, KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt16),
|
||||
FlattenFwdGpuKernelMod, int16_t, int64_t)
|
||||
// int8
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
Reshape, KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt8),
|
||||
FlattenFwdGpuKernelMod, char, int64_t)
|
||||
|
||||
// uint64
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
Reshape, KernelAttr().AddInputAttr(kNumberTypeUInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeUInt64),
|
||||
FlattenFwdGpuKernelMod, uint64_t, int64_t)
|
||||
|
||||
// uint32
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
Reshape, KernelAttr().AddInputAttr(kNumberTypeUInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeUInt32),
|
||||
FlattenFwdGpuKernelMod, uint, int64_t)
|
||||
|
||||
// uint16
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
Reshape, KernelAttr().AddInputAttr(kNumberTypeUInt16).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeUInt16),
|
||||
FlattenFwdGpuKernelMod, uint16_t, int64_t)
|
||||
|
||||
// uint8
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
Reshape, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeUInt8),
|
||||
FlattenFwdGpuKernelMod, uchar, int64_t)
|
||||
|
||||
// bool
|
||||
MS_REG_GPU_KERNEL_TWO(
|
||||
Reshape, KernelAttr().AddInputAttr(kNumberTypeBool).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeBool),
|
||||
FlattenFwdGpuKernelMod, bool, int64_t)
|
||||
} // namespace
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T>
|
||||
template <typename T, typename S = int64_t>
|
||||
class FlattenFwdGpuKernelMod : public NativeGpuKernelMod {
|
||||
public:
|
||||
FlattenFwdGpuKernelMod() : input_size_(0), is_null_input_(false) {}
|
||||
|
@ -48,6 +48,7 @@ class FlattenFwdGpuKernelMod : public NativeGpuKernelMod {
|
|||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
auto shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
|
||||
kernel_node_ = kernel_node;
|
||||
is_null_input_ = CHECK_SHAPE_NULL(shape, kernel_name_, "input");
|
||||
|
@ -75,6 +76,21 @@ class FlattenFwdGpuKernelMod : public NativeGpuKernelMod {
|
|||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(input_size_);
|
||||
output_size_list_.push_back(input_size_);
|
||||
InitDynamicAttrSizeLists(kernel_node_.lock());
|
||||
}
|
||||
|
||||
inline void InitDynamicAttrSizeLists(const CNodePtr &kernel_node) {
|
||||
size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num == 1) {
|
||||
return;
|
||||
}
|
||||
for (size_t index = 1; index < input_num; ++index) {
|
||||
size_t input_size = sizeof(S);
|
||||
for (size_t x : AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, index)) {
|
||||
input_size *= x;
|
||||
}
|
||||
input_size_list_.push_back(input_size);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
@ -49,6 +49,8 @@ class DynamicBroadcastGradientArgsGpuKernelMod : public NativeGpuKernelMod {
|
|||
CHECK_CUDA_RET_WITH_EXCEPT(
|
||||
kernel_node_, cudaMemcpyAsync(&x1_value[0], s1_addr, input_size_list_[1], cudaMemcpyDeviceToHost, cuda_stream),
|
||||
"DynamicBroadcastGradientArgs copy s1 value failed");
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaStreamSynchronize(cuda_stream),
|
||||
"DynamicBroadcastGradientArgs cudaStreamSynchronized failed");
|
||||
auto grad_reduce_idx = CalOut({x0_value, x1_value});
|
||||
r0_size_ = SetOuputValue(r0_addr, grad_reduce_idx[0], x0_value.size(), cuda_stream);
|
||||
r1_size_ = SetOuputValue(r1_addr, grad_reduce_idx[1], x1_value.size(), cuda_stream);
|
||||
|
|
|
@ -84,7 +84,8 @@ std::set<int64_t> GetDependsFormMap(const CNodePtr &cnode) {
|
|||
{kReshape, ShapeSet{1}},
|
||||
{kSlice, ShapeSet{1, 2}},
|
||||
{kSliceGrad, ShapeSet{2, 3}},
|
||||
{kDynamicBroadcastTo, ShapeSet{1}}};
|
||||
{kDynamicBroadcastTo, ShapeSet{1}},
|
||||
{kReduceSum, ShapeSet{1}}};
|
||||
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (cnode->inputs().empty()) {
|
||||
|
@ -98,7 +99,7 @@ std::set<int64_t> GetDependsFormMap(const CNodePtr &cnode) {
|
|||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
auto device = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
|
||||
// Special dynamic shape depends for Ascend.
|
||||
if (device == kAscendDevice && (prim_name == kReduceSum || prim_name == kTranspose)) {
|
||||
if (device == kAscendDevice && prim_name == kTranspose) {
|
||||
return {1};
|
||||
}
|
||||
|
||||
|
|
|
@ -45,27 +45,34 @@ bool BatchNormGrad::get_is_training() const {
|
|||
return GetValue<bool>(value_ptr);
|
||||
}
|
||||
|
||||
constexpr auto kInputNum = 6;
|
||||
|
||||
abstract::TupleShapePtr BatchNormGradInferShape(const PrimitivePtr &primitive,
|
||||
const std::vector<AbstractBasePtr> &input_args) {
|
||||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
auto prim_name = primitive->name();
|
||||
(void)CheckAndConvertUtils::CheckInputArgs(input_args, kGreaterEqual, kInputNum, prim_name);
|
||||
auto x_shape_ptr = input_args[kInputIndex1]->BuildShape();
|
||||
auto scale_shape_ptr = input_args[kInputIndex2]->BuildShape();
|
||||
return std::make_shared<abstract::TupleShape>(
|
||||
std::vector<abstract::BaseShapePtr>{x_shape_ptr, scale_shape_ptr, scale_shape_ptr});
|
||||
}
|
||||
|
||||
TuplePtr BatchNormGradInferType(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) {
|
||||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
auto prim_name = primitive->name();
|
||||
(void)CheckAndConvertUtils::CheckInputArgs(input_args, kGreaterEqual, kInputNum, prim_name);
|
||||
auto x_type_ptr = input_args[kInputIndex1]->BuildType();
|
||||
auto scale_type_ptr = input_args[kInputIndex2]->BuildType();
|
||||
return std::make_shared<Tuple>(std::vector<TypePtr>{x_type_ptr, scale_type_ptr, scale_type_ptr});
|
||||
}
|
||||
|
||||
AbstractBasePtr BatchNormGradInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||
const std::vector<AbstractBasePtr> &input_args) {
|
||||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
for (auto item : input_args) {
|
||||
MS_EXCEPTION_IF_NULL(item);
|
||||
}
|
||||
const int64_t input_num = 5;
|
||||
(void)CheckAndConvertUtils::CheckInteger("BatchNormGrad infer", SizeToLong(input_args.size()), kGreaterEqual,
|
||||
input_num, primitive->name());
|
||||
auto y_backprop_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
|
||||
auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[1]->BuildShape())[kShape];
|
||||
CheckAndConvertUtils::Check("BatchNorm y_backprop_shape", y_backprop_shape, kEqual, x_shape);
|
||||
|
||||
auto dx = input_args[kInputIndex1]->Broaden();
|
||||
auto dscale = input_args[kInputIndex2]->Broaden();
|
||||
auto reserve_1 = input_args[kInputIndex3]->Broaden();
|
||||
auto reserve_2 = input_args[kInputIndex4]->Broaden();
|
||||
|
||||
AbstractBasePtrList rets = {dx, dscale, dscale, reserve_1, reserve_2};
|
||||
return std::make_shared<abstract::AbstractTuple>(rets);
|
||||
return abstract::MakeAbstract(BatchNormGradInferShape(primitive, input_args),
|
||||
BatchNormGradInferType(primitive, input_args));
|
||||
}
|
||||
REGISTER_PRIMITIVE_C(kNameBatchNormGrad, BatchNormGrad);
|
||||
REGISTER_PRIMITIVE_EVAL_IMPL(BatchNormGrad, prim::kPrimBatchNormGrad, BatchNormGradInfer, nullptr, true);
|
||||
} // namespace ops
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -118,7 +118,7 @@ class SqrtGrad(PrimitiveWithInfer):
|
|||
return x_dtype
|
||||
|
||||
|
||||
class BatchNormGrad(PrimitiveWithInfer):
|
||||
class BatchNormGrad(Primitive):
|
||||
"""Performs grad of BatchNorm operation."""
|
||||
|
||||
@prim_attr_register
|
||||
|
@ -127,13 +127,6 @@ class BatchNormGrad(PrimitiveWithInfer):
|
|||
self.epsilon = validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', self.name)
|
||||
self.data_format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
|
||||
|
||||
def infer_shape(self, y_backprop_shape, x_shape, scale_shape, save_mean_shape, save_variance_shape, reserve):
|
||||
validator.check("BatchNorm y_backprop_shape", y_backprop_shape, "BatchNorm x_shape", x_shape)
|
||||
return (x_shape, scale_shape, scale_shape)
|
||||
|
||||
def infer_dtype(self, y_backprop_type, x_type, scale_type, save_mean_shape, save_variance_shape, reserve):
|
||||
return (x_type, scale_type, scale_type)
|
||||
|
||||
|
||||
class SyncBatchNormGrad(PrimitiveWithInfer):
|
||||
"""Performs grad of SyncBatchNorm operation."""
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
# Copyright 2022 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.nn as nn
|
||||
import mindspore.context as context
|
||||
|
||||
from mindspore.ops.operations import _inner_ops
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.args = _inner_ops.DynamicBroadcastGradientArgs()
|
||||
|
||||
def construct(self, s0, s1):
|
||||
return self.args(s0, s1)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_net():
|
||||
"""
|
||||
Feature: DynamicBroadcastGradientArgs op.
|
||||
Description: test cases for DynamicBroadcastGradientArgs op.
|
||||
Expectation: the result match expected array.
|
||||
"""
|
||||
shape0 = (4, 2, 1)
|
||||
shape1 = (2, 7)
|
||||
net = Net()
|
||||
r0, r1 = net(shape0, shape1)
|
||||
r0_expected = [2]
|
||||
r1_expected = [0]
|
||||
|
||||
assert np.array_equal(r0_expected, r0.asnumpy())
|
||||
assert np.array_equal(r1_expected, r1.asnumpy())
|
|
@ -102,15 +102,7 @@ class ConcatNet(nn.Cell):
|
|||
return self.op((x1, x2))
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_dynamic_concat():
|
||||
"""
|
||||
Feature: Test Concat and its backward.
|
||||
Description: The shape of inputs is dynamic.
|
||||
Expectation: Assert that results are consistent with fixed shape.
|
||||
"""
|
||||
def dynamic_concat_run(is_grad):
|
||||
axis = 1
|
||||
dtype = np.float32
|
||||
data_list = []
|
||||
|
@ -118,15 +110,151 @@ def test_dynamic_concat():
|
|||
data = []
|
||||
data.append(np.random.rand(i, 16).astype(dtype))
|
||||
data.append(np.random.rand(i, 32).astype(dtype))
|
||||
data.append(np.random.rand(i, 48).astype(dtype))
|
||||
if is_grad:
|
||||
data.append(np.random.rand(i, 48).astype(dtype))
|
||||
data_list.append(tuple(data))
|
||||
column_names = get_columns(len(data_list[0]))
|
||||
dataset = ds.GeneratorDataset(data_list, column_names, shuffle=False)
|
||||
dataset.set_dynamic_columns(
|
||||
columns={column_names[0]: [None, 16], column_names[1]: [None, 32], column_names[2]: [None, 48]})
|
||||
dynamic_columns = {column_names[0]: [
|
||||
None, 16], column_names[1]: [None, 32]}
|
||||
if is_grad:
|
||||
dynamic_columns[column_names[-1]] = [None, 48]
|
||||
dataset.set_dynamic_columns(columns=dynamic_columns)
|
||||
net = ConcatNet(axis)
|
||||
if is_grad:
|
||||
net = GradNetWrtX(net)
|
||||
output = dynamic_shape_sink_process(net, dataset)
|
||||
output_cmp = fixed_shape_process(net, dataset)
|
||||
assert compare(output, output_cmp)
|
||||
|
||||
net = GradNetWrtX(ConcatNet(axis))
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_dynamic_concat_forward():
|
||||
"""
|
||||
Feature: Test Concat.
|
||||
Description: The shape of inputs is dynamic.
|
||||
Expectation: Assert that results are consistent with fixed shape.
|
||||
"""
|
||||
dynamic_concat_run(False)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_dynamic_concat_backward():
|
||||
"""
|
||||
Feature: Test backward of Concat.
|
||||
Description: The shape of inputs is dynamic.
|
||||
Expectation: Assert that results are consistent with fixed shape.
|
||||
"""
|
||||
dynamic_concat_run(True)
|
||||
|
||||
|
||||
class BatchNormNet(nn.Cell):
|
||||
def __init__(self, c):
|
||||
super(BatchNormNet, self).__init__()
|
||||
self.bn = nn.BatchNorm1d(c)
|
||||
|
||||
def construct(self, input_data):
|
||||
x = self.bn(input_data)
|
||||
return x
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_dynamic_bachnorm():
|
||||
"""
|
||||
Feature: Test BatchNorm and its backward.
|
||||
Description: The shape of inputs is dynamic.
|
||||
Expectation: Assert that results are consistent with fixed shape.
|
||||
"""
|
||||
c = 256
|
||||
dtype = np.float32
|
||||
data_list = []
|
||||
for i in [2, 64]:
|
||||
data = []
|
||||
data.append(np.random.rand(i, c).astype(dtype))
|
||||
data.append(np.random.rand(i, c).astype(dtype))
|
||||
data_list.append(tuple(data))
|
||||
column_names = get_columns(len(data_list[0]))
|
||||
dataset = ds.GeneratorDataset(data_list, column_names, shuffle=False)
|
||||
dynamic_columns = {column_names[0]: [None, c], column_names[1]: [None, c]}
|
||||
dataset.set_dynamic_columns(columns=dynamic_columns)
|
||||
net = GradNetWrtX(BatchNormNet(c))
|
||||
gradients = dynamic_shape_sink_process(net, dataset)
|
||||
gradients_cmp = fixed_shape_process(net, dataset)
|
||||
assert compare(gradients, gradients_cmp)
|
||||
|
||||
|
||||
class ReshapeNet(nn.Cell):
|
||||
def construct(self, x, y):
|
||||
shape_of_y = ops.DynamicShape()(y)
|
||||
return ops.Reshape()(x, shape_of_y)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_dynamic_reshape():
|
||||
"""
|
||||
Feature: Test Reshape.
|
||||
Description: The shape of inputs is dynamic.
|
||||
Expectation: Assert that results are consistent with fixed shape.
|
||||
"""
|
||||
dtype = np.float32
|
||||
data_list = []
|
||||
for i in [2, 96]:
|
||||
data = []
|
||||
data.append(np.random.rand(i, 64, 1).astype(dtype))
|
||||
data.append(np.random.rand(i, 64).astype(dtype))
|
||||
data_list.append(tuple(data))
|
||||
column_names = get_columns(len(data_list[0]))
|
||||
dataset = ds.GeneratorDataset(data_list, column_names, shuffle=False)
|
||||
dynamic_columns = {column_names[0]: [
|
||||
None, 64, 1], column_names[1]: [None, 64]}
|
||||
dataset.set_dynamic_columns(columns=dynamic_columns)
|
||||
net = ReshapeNet()
|
||||
output = dynamic_shape_sink_process(net, dataset)
|
||||
output_cmp = fixed_shape_process(net, dataset)
|
||||
assert compare(output, output_cmp)
|
||||
|
||||
|
||||
class ReduceSumNet(nn.Cell):
|
||||
def __init__(self):
|
||||
super(ReduceSumNet, self).__init__()
|
||||
self.reduce = ops.ReduceSum()
|
||||
|
||||
def construct(self, x, y):
|
||||
return self.reduce(x, y)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_dynamic_reduce_sum():
|
||||
"""
|
||||
Feature: Test ReduceSum.
|
||||
Description: The shape of inputs is dynamic.
|
||||
Expectation: Assert that results are consistent with result of the numpy compute
|
||||
"""
|
||||
dtype = np.float32
|
||||
data_list = []
|
||||
for i in [2, 96]:
|
||||
data = []
|
||||
data.append(np.random.rand(i, 256).astype(dtype))
|
||||
data.append(np.array([1], dtype=np.int64))
|
||||
data_list.append(tuple(data))
|
||||
column_names = get_columns(len(data_list[0]))
|
||||
dataset = ds.GeneratorDataset(data_list, column_names, shuffle=False)
|
||||
dynamic_columns = {column_names[0]: [None, 256]}
|
||||
dataset.set_dynamic_columns(columns=dynamic_columns)
|
||||
net = ReduceSumNet()
|
||||
output = dynamic_shape_sink_process(net, dataset)
|
||||
# Currently, the parameter axis of ReduceSum operator is dynamic(tensor) is
|
||||
# not supported under the fixed shape, so numpy is used for comparison
|
||||
inputs = data_list[0]
|
||||
output_cmp = np.sum(inputs[0], inputs[1][0])
|
||||
assert np.allclose(output.asnumpy(), output_cmp, rtol=1.0e-4, atol=1.0e-4)
|
||||
|
|
Loading…
Reference in New Issue