convert the implementation of BroadCastTo, ReluGrad, ReLU6Grad CPU operators to nnacl

This commit is contained in:
buxue 2021-04-19 17:06:05 +08:00
parent 8487ce0c09
commit 3dc5f1a41d
20 changed files with 278 additions and 248 deletions

View File

@ -15,80 +15,42 @@
*/
#include "backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.h"
#include "nnacl/errorcode.h"
namespace mindspore {
namespace kernel {
template <typename T>
void BroadcastToCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
size_t input_shape_size = input_shape_.size();
size_t output_shape_size = output_shape_.size();
size_t offset = output_shape_.size() - input_shape_.size();
for (size_t i = 0; i < offset; ++i) {
input_shape_.insert(input_shape_.begin(), 1);
if (output_shape_size < input_shape_size) {
MS_LOG(EXCEPTION) << "Cannot broadcast input tensor with shape " << input_shape_
<< " to a smaller dimension shape " << output_shape_ << ".";
}
if (output_shape_size > MAX_SHAPE_SIZE) {
MS_LOG(EXCEPTION) << "Cannot broadcast input tensor with shape " << input_shape_ << " to a shape " << output_shape_
<< " more than 8-D.";
}
size_t offset = output_shape_size - input_shape_size;
for (size_t i = 0; i < input_shape_size; ++i) {
if (input_shape_[i] != output_shape_[i + offset] && input_shape_[i] != 1) {
MS_LOG(EXCEPTION) << "Cannot broadcast input tensor with shape " << input_shape_ << " to a shape "
<< output_shape_ << ".";
}
}
for (size_t i = 0; i < input_shape_.size(); ++i) {
if (output_shape_[i] < input_shape_[i] || output_shape_[i] % input_shape_[i] != 0) {
MS_LOG(EXCEPTION) << "Cannot broadcast input tensor with shape " << input_shape_ << " to "
<< "output tensor with shape " << output_shape_
<< ". Output shape must be the integer times of input shape at the " << i << " dim!";
for (size_t i = 0; i < input_shape_size; ++i) {
shape_info_.input_shape_[i] = SizeToInt(input_shape_[i]);
}
for (size_t i = 0; i < output_shape_size; ++i) {
shape_info_.output_shape_[i] = SizeToInt(output_shape_[i]);
}
for (size_t j = 0; j < output_shape_.size(); j++) {
nums_ *= output_shape_[j];
}
tmp_ptr_ = reinterpret_cast<T *>(malloc(nums_ * sizeof(T)));
}
// BroadcastTo
template <typename T>
void BroadcastToCPUKernel<T>::BroadcastToImpl(size_t dim) {
if (dim == output_shape_.size() - 1) {
size_t input_nums = 1;
for (size_t j = 0; j < input_shape_.size() - 1; ++j) {
input_nums *= input_shape_[j];
}
size_t rate = output_shape_[dim] / input_shape_[dim];
for (size_t j = 0; j < input_nums; ++j) {
T *in_ptr = input_ptr_ + input_shape_[dim] * j;
for (size_t i = 0; i < rate; ++i) {
T *out_ptr = tmp_ptr_ + (j * rate + i) * input_shape_[dim];
memcpy_s(out_ptr, input_shape_[dim] * sizeof(T), in_ptr, input_shape_[dim] * sizeof(T));
}
}
size_t elems = input_shape_[dim] * rate * input_nums;
memcpy_s(output_ptr_, elems * sizeof(T), tmp_ptr_, elems * sizeof(T));
return;
}
BroadcastToImpl(dim + 1);
size_t rate = output_shape_[dim] / input_shape_[dim];
if (rate > 1) {
size_t elems_nums = 1;
for (size_t j = output_shape_.size() - 1; j > dim; --j) {
elems_nums *= output_shape_[j];
}
size_t input_nums = 1;
for (size_t j = 0; j < dim; ++j) {
input_nums *= input_shape_[j];
}
for (size_t j = 0; j < input_nums; ++j) {
T *in_ptr = output_ptr_ + elems_nums * j;
for (size_t i = 0; i < rate; ++i) {
T *out_ptr = tmp_ptr_ + (j * rate + i) * elems_nums;
memcpy_s(out_ptr, elems_nums * sizeof(T), in_ptr, elems_nums * sizeof(T));
}
}
size_t elems = elems_nums * rate * input_nums;
memcpy_s(output_ptr_, elems * sizeof(T), tmp_ptr_, elems * sizeof(T));
}
shape_info_.input_shape_size_ = SizeToInt(input_shape_size);
shape_info_.output_shape_size_ = SizeToInt(output_shape_size);
}
template <typename T>
@ -96,26 +58,34 @@ bool BroadcastToCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, cons
const std::vector<AddressPtr> &outputs) {
if (inputs.size() != 1 || outputs.size() != 1) {
MS_LOG(EXCEPTION) << "Wrong number of inputs or outputs!";
return false;
}
if ((inputs[0] == nullptr) || (inputs[0]->size == 0)) {
MS_LOG(EXCEPTION) << "Input data is NULL!";
return false;
}
if ((outputs[0] == nullptr) || (outputs[0]->size == 0)) {
MS_LOG(EXCEPTION) << "Output data is NULL!";
return false;
}
input_ptr_ = reinterpret_cast<T *>(inputs[0]->addr);
output_ptr_ = reinterpret_cast<T *>(outputs[0]->addr);
BroadcastToImpl(0);
const auto input_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
int ret = NNACL_ERR;
if constexpr (std::is_same_v<T, bool>) {
ret = BroadcastTo(bool, input_addr, &shape_info_, output_addr);
} else if constexpr (std::is_same_v<T, int>) {
ret = BroadcastTo(int, input_addr, &shape_info_, output_addr);
} else if constexpr (std::is_same_v<T, float>) {
ret = BroadcastTo(float, input_addr, &shape_info_, output_addr);
} else {
MS_LOG(EXCEPTION) << "Not supported data type for BroadcastTo.";
}
if (ret == NNACL_OK) {
return true;
}
MS_LOG(ERROR) << "Broadcast tensor with shape " << input_shape_ << " to shape " << output_shape_
<< " execute failed.";
return false;
}
} // namespace kernel
} // namespace mindspore

View File

@ -21,44 +21,32 @@
#include <memory>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
#include "nnacl/base/broadcast_to.h"
namespace mindspore {
namespace kernel {
template <typename T>
class BroadcastToCPUKernel : public CPUKernel {
public:
BroadcastToCPUKernel() = default;
~BroadcastToCPUKernel() override {
if (tmp_ptr_ != nullptr) {
free(tmp_ptr_);
tmp_ptr_ = nullptr;
}
};
~BroadcastToCPUKernel() = default;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs) override;
void InitKernel(const CNodePtr &kernel_node) override;
void BroadcastToImpl(size_t dim);
size_t Index(const size_t &index, const size_t &dim) { return dim == 1 ? 0 : index; }
private:
std::vector<size_t> input_shape_;
std::vector<size_t> output_shape_;
size_t nums_{1};
T *input_ptr_{nullptr};
T *output_ptr_{nullptr};
T *tmp_ptr_{nullptr};
BroadcastShapeInfo shape_info_;
};
MS_REG_CPU_KERNEL(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
BroadcastToCPUKernel<float>);
MS_REG_CPU_KERNEL(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
BroadcastToCPUKernel<int>);
MS_REG_CPU_KERNEL(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
BroadcastToCPUKernel<bool>);
MS_REG_CPU_KERNEL_T(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
BroadcastToCPUKernel, float);
MS_REG_CPU_KERNEL_T(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
BroadcastToCPUKernel, int);
MS_REG_CPU_KERNEL_T(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
BroadcastToCPUKernel, bool);
} // namespace kernel
} // namespace mindspore

View File

@ -18,28 +18,32 @@
#include "backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h"
#include "common/thread_pool.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "nnacl/fp32_grad/activation_grad.h"
#include "nnacl/errorcode.h"
namespace mindspore {
namespace kernel {
template <typename T>
void EltWiseGradCPUKernel<T>::ReluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
if (input2[i] > 0) {
out[i] = input1[i];
} else {
out[i] = 0;
if constexpr (std::is_same_v<T, float>) {
int ret = ::ReluGrad(input1 + start, input2 + start, end - start, out + start);
if (ret == NNACL_ERR) {
MS_LOG(EXCEPTION) << "ReLUGrad failed.";
}
} else {
MS_LOG(EXCEPTION) << "ReLUGrad only support float";
}
}
template <typename T>
void EltWiseGradCPUKernel<T>::ReLU6Grad(const T *input1, const T *input2, T *out, size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
if (input2[i] > 0 && input2[i] <= 6) {
out[i] = input1[i];
} else {
out[i] = 0;
if constexpr (std::is_same_v<T, float>) {
int ret = ::Relu6Grad(input1 + start, input2 + start, end - start, out + start);
if (ret == NNACL_ERR) {
MS_LOG(EXCEPTION) << "ReLU6Grad failed.";
}
} else {
MS_LOG(EXCEPTION) << "ReLU6Grad only support float";
}
}

View File

@ -30,12 +30,9 @@ file(GLOB KERNEL_SRC
${NNACL_DIR}/int8/*.c
${NNACL_DIR}/infer/*.c
${NNACL_DIR}/base/*.c
${NNACL_DIR}/fp32_grad/*.c
)
if(SUPPORT_TRAIN)
file(GLOB TRAIN_SRC ${NNACL_DIR}/fp32_grad/*.c)
endif()
if(PLATFORM_ARM64)
file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm64/*.S)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)

View File

@ -0,0 +1,95 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "nnacl/base/broadcast_to.h"
#include <string.h>
#include "nnacl/op_base.h"
#include "nnacl/errorcode.h"
size_t accumulate(const int *shape, int start, int end) {
size_t product = 1;
for (int i = start; i <= end; ++i) {
product *= (size_t)shape[i];
}
return product;
}
void pad_input_shape(int *input_shape, int input_shape_len, int output_shape_len) {
if (input_shape_len < output_shape_len) {
const int shape_gap = output_shape_len - input_shape_len;
for (int i = input_shape_len - 1; i >= 0; --i) {
input_shape[i + shape_gap] = input_shape[i];
}
for (int i = 0; i < shape_gap; ++i) {
input_shape[i] = 1;
}
}
}
#define BROADCAST_TO(type) \
int broadcast_to_##type(const type *input, BroadcastShapeInfo *shape_info, type *output) { \
if (shape_info->output_shape_size_ > MAX_SHAPE_SIZE) { \
return NNACL_ERR; \
} \
int *input_shape = shape_info->input_shape_; \
const int *output_shape = shape_info->output_shape_; \
const int dim_max = shape_info->output_shape_size_ - 1; \
const size_t bool_length = 1, number_length = 4; \
const size_t data_length = strcmp(#type, "bool") ? number_length : bool_length; \
const size_t temp_length = accumulate(output_shape, 0, dim_max); \
type *data_temp = (type *)malloc(temp_length * data_length); \
if (data_temp == NULL) { \
return NNACL_ERR; \
} \
pad_input_shape(input_shape, shape_info->input_shape_size_, dim_max + 1); \
shape_info->input_shape_size_ = dim_max + 1; \
\
size_t before_dim_elements_num = accumulate(input_shape, 0, dim_max - 1); \
size_t after_dim_elements_num = input_shape[dim_max]; \
size_t dim_broadcast_rate = (size_t)(output_shape[dim_max] / input_shape[dim_max]); \
for (size_t i = 0; i < before_dim_elements_num; ++i) { \
const type *in_ptr = input + i * after_dim_elements_num; \
for (size_t j = 0; j < dim_broadcast_rate; ++j) { \
type *out_ptr = output + (i * dim_broadcast_rate + j) * after_dim_elements_num; \
memcpy(out_ptr, in_ptr, after_dim_elements_num *data_length); \
} \
} \
\
int dim_index = dim_max - 1; \
while (dim_index >= 0) { \
dim_broadcast_rate = (size_t)(output_shape[dim_index] / input_shape[dim_index]); \
if (dim_broadcast_rate > 1) { \
before_dim_elements_num = accumulate(input_shape, 0, dim_index - 1); \
after_dim_elements_num = accumulate(output_shape, dim_index + 1, dim_max); \
for (size_t i = 0; i < before_dim_elements_num; ++i) { \
type *in_ptr = output + i * after_dim_elements_num; \
for (size_t j = 0; j < dim_broadcast_rate; ++j) { \
type *out_ptr = data_temp + (i * dim_broadcast_rate + j) * after_dim_elements_num; \
memcpy(out_ptr, in_ptr, after_dim_elements_num *data_length); \
} \
} \
size_t elements_total = before_dim_elements_num * dim_broadcast_rate * after_dim_elements_num; \
memcpy(output, data_temp, elements_total *data_length); \
} \
--dim_index; \
} \
free(data_temp); \
return NNACL_OK; \
}
BROADCAST_TO(int)
BROADCAST_TO(float)
BROADCAST_TO(bool)

View File

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -13,18 +13,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_NNACL_FP32_BROADCAST_TO_FP32_H_
#define MINDSPORE_NNACL_FP32_BROADCAST_TO_FP32_H_
#ifndef MINDSPORE_NNACL_FP32_BROADCAST_TO_H_
#define MINDSPORE_NNACL_FP32_BROADCAST_TO_H_
#include "nnacl/op_base.h"
#include "nnacl/broadcast_to_parameter.h"
#ifdef __cplusplus
extern "C" {
#endif
int BroadcastTo(const float *input, BroadcastShapeInfo *shape_info, float *output);
#define BroadcastTo(type, input, shape_info, output) broadcast_to_##type(input, shape_info, output)
int broadcast_to_int(const int *input, BroadcastShapeInfo *shape_info, int *output);
int broadcast_to_float(const float *input, BroadcastShapeInfo *shape_info, float *output);
int broadcast_to_bool(const bool *input, BroadcastShapeInfo *shape_info, bool *output);
#ifdef __cplusplus
}
#endif
#endif // MINDSPORE_NNACL_FP32_BROADCAST_TO_FP32_H_
#endif // MINDSPORE_NNACL_FP32_BROADCAST_TO_H_

View File

@ -20,14 +20,14 @@
typedef struct BroadcastToParameter {
OpParameter op_parameter_;
int shape_[COMM_SHAPE_SIZE];
int shape_[MAX_SHAPE_SIZE];
size_t shape_size_;
} BroadcastToParameter;
typedef struct BroadcastShapeInfo {
int input_shape_[COMM_SHAPE_SIZE];
int input_shape_[MAX_SHAPE_SIZE];
int input_shape_size_;
int output_shape_[COMM_SHAPE_SIZE];
int output_shape_[MAX_SHAPE_SIZE];
int output_shape_size_;
} BroadcastShapeInfo;

View File

@ -1,103 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "nnacl/fp32/broadcast_to_fp32.h"
#include <string.h>
#include "nnacl/op_base.h"
#include "nnacl/errorcode.h"
void PadBroadcastShapeInfo(BroadcastShapeInfo *shape_info) {
if (shape_info->input_shape_size_ < DIMENSION_4D) {
int input_shape_tmp[DIMENSION_4D];
for (int i = 0; i < shape_info->input_shape_size_; ++i) {
input_shape_tmp[i] = shape_info->input_shape_[i];
}
int input_shape_index = shape_info->input_shape_size_ - 1;
for (int i = DIMENSION_4D - 1; i >= 0; --i) {
if (input_shape_index >= 0) {
shape_info->input_shape_[i] = input_shape_tmp[input_shape_index--];
} else {
shape_info->input_shape_[i] = 1;
}
}
}
if (shape_info->output_shape_size_ < DIMENSION_4D) {
int output_shape_tmp[DIMENSION_4D];
for (int i = 0; i < shape_info->output_shape_size_; ++i) {
output_shape_tmp[i] = shape_info->output_shape_[i];
}
int output_shape_index = shape_info->output_shape_size_ - 1;
for (int i = DIMENSION_4D - 1; i >= 0; --i) {
if (output_shape_index >= 0) {
shape_info->output_shape_[i] = output_shape_tmp[output_shape_index--];
} else {
shape_info->output_shape_[i] = 1;
}
}
}
}
int BroadcastTo(const float *input, BroadcastShapeInfo *shape_info, float *output) {
if (shape_info->input_shape_size_ > DIMENSION_4D || shape_info->output_shape_size_ > DIMENSION_4D) {
return NNACL_ERR;
}
PadBroadcastShapeInfo(shape_info);
size_t input_dim_offset[DIMENSION_4D - 1];
input_dim_offset[2] = shape_info->input_shape_[3] * 4;
input_dim_offset[1] = input_dim_offset[2] * shape_info->input_shape_[2];
input_dim_offset[0] = input_dim_offset[1] * shape_info->input_shape_[1];
size_t output_dim_offset[DIMENSION_4D - 1];
output_dim_offset[2] = shape_info->output_shape_[3] * 4;
output_dim_offset[1] = output_dim_offset[2] * shape_info->output_shape_[2];
output_dim_offset[0] = output_dim_offset[1] * shape_info->output_shape_[1];
uint8_t *in_base = (uint8_t *)input;
uint8_t *out_base = (uint8_t *)(output);
for (int32_t dim0 = 0; dim0 < shape_info->input_shape_[0]; ++dim0) {
for (int32_t dim1 = 0; dim1 < shape_info->input_shape_[1]; ++dim1) {
for (int32_t dim2 = 0; dim2 < shape_info->input_shape_[2]; ++dim2) {
if (shape_info->input_shape_[3] == shape_info->output_shape_[3]) {
memcpy(out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1 + output_dim_offset[2] * dim2,
in_base + input_dim_offset[0] * dim0 + input_dim_offset[1] * dim1 + input_dim_offset[2] * dim2,
input_dim_offset[2]);
} else {
for (int32_t dim3 = 0; dim3 < shape_info->output_shape_[3]; ++dim3) {
memcpy(out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1 + output_dim_offset[2] * dim2 +
dim3 * 4,
in_base + input_dim_offset[0] * dim0 + input_dim_offset[1] * dim1 + input_dim_offset[2] * dim2, 4);
}
}
}
if (shape_info->input_shape_[2] != shape_info->output_shape_[2]) {
for (int32_t dim2 = 0; dim2 < shape_info->output_shape_[2]; ++dim2) {
memcpy(out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1 + dim2 * output_dim_offset[2],
out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1, output_dim_offset[2]);
}
}
}
if (shape_info->input_shape_[1] != shape_info->output_shape_[1]) {
for (int32_t dim1 = 0; dim1 < shape_info->output_shape_[1]; ++dim1) {
memcpy(out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1,
out_base + output_dim_offset[0] * dim0, output_dim_offset[1]);
}
}
}
if (shape_info->input_shape_[0] != shape_info->output_shape_[0]) {
for (int32_t dim0 = 0; dim0 < shape_info->output_shape_[0]; ++dim0) {
memcpy(out_base + output_dim_offset[0] * dim0, out_base, output_dim_offset[0]);
}
}
return NNACL_OK;
}

View File

@ -20,7 +20,7 @@
#include "nnacl/fp32_grad/activation_grad.h"
#include "nnacl/errorcode.h"
inline int ReluGrad(float *src0, float *src1, size_t length, float *dst) {
int ReluGrad(const float *src0, const float *src1, size_t length, float *dst) {
int i = 0;
#ifdef ENABLE_ARM
float32x4_t zero_4 = vdupq_n_f32(0.0f);
@ -38,7 +38,7 @@ inline int ReluGrad(float *src0, float *src1, size_t length, float *dst) {
return NNACL_OK;
}
int Relu6Grad(float *src0, float *src1, size_t length, float *dst) {
int Relu6Grad(const float *src0, const float *src1, size_t length, float *dst) {
int i = 0;
#ifdef ENABLE_ARM
float32x4_t zero_4 = vdupq_n_f32(0.0f);
@ -59,28 +59,28 @@ int Relu6Grad(float *src0, float *src1, size_t length, float *dst) {
return NNACL_OK;
}
int LReluGrad(float *src0, float *src1, size_t length, float *dst, float alpha) {
int LReluGrad(const float *src0, const float *src1, size_t length, float *dst, float alpha) {
for (size_t i = 0; i < length; ++i) {
dst[i] = src1[i] > 0.0f ? src0[i] : alpha * src0[i];
}
return NNACL_OK;
}
int SigmoidGrad(float *src0, float *src1, size_t length, float *dst) {
int SigmoidGrad(const float *src0, const float *src1, size_t length, float *dst) {
for (size_t i = 0; i < length; ++i) {
dst[i] = src0[i] * (src1[i] * (1.0f - src1[i]));
}
return NNACL_OK;
}
int TanhGrad(float *src0, float *src1, size_t length, float *dst) {
int TanhGrad(const float *src0, const float *src1, size_t length, float *dst) {
for (size_t i = 0; i < length; ++i) {
dst[i] = (1.0f - (src1[i] * src1[i])) * src0[i];
}
return NNACL_OK;
}
int HSwishGrad(float *src0, float *src1, size_t length, float *dst) {
int HSwishGrad(const float *src0, const float *src1, size_t length, float *dst) {
for (size_t i = 0; i < length; ++i) {
float tmp = (src1[i] > 3.0f ? 1.0f : (src1[i] < -3.0f ? 0.0f : (2.0f * src1[i] + 3.0f) / 6.0f));
dst[i] = tmp * src0[i];
@ -88,7 +88,7 @@ int HSwishGrad(float *src0, float *src1, size_t length, float *dst) {
return NNACL_OK;
}
int HSigmoidGrad(float *src0, float *src1, size_t length, float *dst) {
int HSigmoidGrad(const float *src0, const float *src1, size_t length, float *dst) {
for (size_t i = 0; i < length; ++i) {
float tmp = (src1[i] > 3.0f ? 0.0f : (src1[i] < -3.0f ? 0.0f : 1.0f / 6.0f));
dst[i] = tmp * src0[i];
@ -96,14 +96,14 @@ int HSigmoidGrad(float *src0, float *src1, size_t length, float *dst) {
return NNACL_OK;
}
int EluGrad(float *src0, float *src1, size_t length, float *dst, float alpha) {
int EluGrad(const float *src0, const float *src1, size_t length, float *dst, float alpha) {
for (size_t i = 0; i < length; ++i) {
dst[i] = (src1[i] > 0.0f ? src0[i] : alpha * expm1(src1[i]) * src0[i]);
}
return NNACL_OK;
}
int GeluGrad(float *src0, float *src1, size_t length, float *dst) {
int GeluGrad(const float *src0, const float *src1, size_t length, float *dst) {
for (size_t i = 0; i < length; ++i) {
dst[i] = src0[i] * ((0.5 * (1.0 + erf(src1[i] / 1.4142135623730951))) +
(src1[i] * exp(-0.5 * src1[i] * src1[i]) / 2.5066282746));

View File

@ -30,15 +30,15 @@ typedef struct ActivationGradParameter {
extern "C" {
#endif
int ReluGrad(float *src0, float *src1, size_t length, float *dst);
int Relu6Grad(float *src0, float *src1, size_t length, float *dst);
int LReluGrad(float *src0, float *src1, size_t length, float *dst, float alpha);
int SigmoidGrad(float *src0, float *src1, size_t length, float *dst);
int TanhGrad(float *src0, float *src1, size_t length, float *dst);
int HSwishGrad(float *src0, float *src1, size_t length, float *dst);
int HSigmoidGrad(float *src0, float *src1, size_t length, float *dst);
int EluGrad(float *src0, float *src1, size_t length, float *dst, float alpha);
int GeluGrad(float *src0, float *src1, size_t length, float *dst);
int ReluGrad(const float *src0, const float *src1, size_t length, float *dst);
int Relu6Grad(const float *src0, const float *src1, size_t length, float *dst);
int LReluGrad(const float *src0, const float *src1, size_t length, float *dst, float alpha);
int SigmoidGrad(const float *src0, const float *src1, size_t length, float *dst);
int TanhGrad(const float *src0, const float *src1, size_t length, float *dst);
int HSwishGrad(const float *src0, const float *src1, size_t length, float *dst);
int HSigmoidGrad(const float *src0, const float *src1, size_t length, float *dst);
int EluGrad(const float *src0, const float *src1, size_t length, float *dst, float alpha);
int GeluGrad(const float *src0, const float *src1, size_t length, float *dst);
#ifdef __cplusplus
}

View File

@ -17,7 +17,7 @@
#define MINDSPORE_NNACL_BROADCAST_TO_INFER_H
#include "nnacl/infer/common_infer.h"
#include "nnacl/fp32/broadcast_to_fp32.h"
#include "nnacl/base/broadcast_to.h"
#ifdef __cplusplus
extern "C" {

View File

@ -57,7 +57,7 @@ else()
endif()
if(ENABLE_CPU)
target_link_libraries(mindspore_shared_lib PRIVATE mindspore::dnnl mindspore::mkldnn)
target_link_libraries(mindspore_shared_lib PRIVATE mindspore::dnnl mindspore::mkldnn nnacl)
endif()
if(USE_GLOG)

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
#include "src/ops/populate/populate_register.h"
#include "nnacl/fp32/broadcast_to_fp32.h"
#include "nnacl/base/broadcast_to.h"
using mindspore::schema::PrimitiveType_BroadcastTo;
namespace mindspore {

View File

@ -16,7 +16,7 @@
#include "schema/model_v0_generated.h"
#include "src/ops/populate/populate_register.h"
#include "nnacl/fp32/broadcast_to_fp32.h"
#include "nnacl/base/broadcast_to.h"
namespace mindspore {
namespace lite {

View File

@ -49,10 +49,10 @@ int BroadcastToCPUKernel::Init() {
}
int BroadcastToCPUKernel::Run() {
auto input_data = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
const auto input_data = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
auto output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
return BroadcastTo(input_data, &shape_info_, output_data);
return BroadcastTo(float, input_data, &shape_info_, output_data);
}
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_BroadcastTo, LiteKernelCreator<BroadcastToCPUKernel>)

View File

@ -19,7 +19,7 @@
#include <vector>
#include "src/lite_kernel.h"
#include "nnacl/fp32/broadcast_to_fp32.h"
#include "nnacl/base/broadcast_to.h"
namespace mindspore::kernel {
class BroadcastToCPUKernel : public LiteKernel {

View File

@ -45,8 +45,8 @@ int ActivationGradCPUKernel::Init() {
int ActivationGradCPUKernel::ReSize() { return RET_OK; }
int ActivationGradCPUKernel::DoActivation(int task_id) {
auto yt_addr = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
auto input_addr = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
const auto yt_addr = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
const auto input_addr = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
auto output_addr = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
int length = in_tensors_.at(0)->ElementsNum();

View File

@ -33,6 +33,24 @@ def test_broadcast():
expect = np.broadcast_to(x_np, shape)
assert np.allclose(output.asnumpy(), expect)
shape = (3, 5, 7, 4, 5, 6)
x_np = np.arange(20).reshape((4, 5, 1)).astype(np.int32)
output = P.BroadcastTo(shape)(Tensor(x_np))
expect = np.broadcast_to(x_np, shape)
assert np.allclose(output.asnumpy(), expect)
shape = (8, 5, 7, 4, 5, 6)
x_np = np.arange(24).reshape((1, 4, 1, 6)).astype(np.bool) + 0.2
output = P.BroadcastTo(shape)(Tensor(x_np))
expect = np.broadcast_to(x_np, shape)
assert np.allclose(output.asnumpy(), expect)
shape = (4, 5, 2, 3, 4, 5, 6)
x_np = np.random.rand(2, 3, 1, 5, 1).astype(np.float32)
output = P.BroadcastTo(shape)(Tensor(x_np))
expect = np.broadcast_to(x_np, shape)
assert np.allclose(output.asnumpy(), expect)
shape = (3, 4, 5, 6)
x_np = np.random.rand(3, 1, 5, 1).astype(np.float32)
output = P.BroadcastTo(shape)(Tensor(x_np))
@ -50,6 +68,12 @@ def test_broadcast():
expect = np.broadcast_to(x1_np, shape)
assert np.allclose(output.asnumpy(), expect)
shape = (4, 5)
x1_np = np.ones((1,)).astype(np.bool_)
output = P.BroadcastTo(shape)(Tensor(x1_np))
expect = np.broadcast_to(x1_np, shape)
assert np.allclose(output.asnumpy(), expect)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training

View File

@ -0,0 +1,53 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.common.initializer import initializer
from mindspore.common.parameter import Parameter
from mindspore.ops.operations import _grad_ops as G
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
class NetReluGrad(nn.Cell):
def __init__(self):
super(NetReluGrad, self).__init__()
self.relu6_grad = G.ReLU6Grad()
self.x = Parameter(initializer(Tensor(np.array([[[[1, 0, 6],
[-2, 3, 6],
[-3, 1, 8]]]]).astype(np.float32)), [1, 1, 3, 3]), name='x')
self.dy = Parameter(initializer(Tensor(np.array([[[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]]]]).astype(np.float32)), [1, 1, 3, 3]), name='dy')
def construct(self):
return self.relu6_grad(self.dy, self.x)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_relu_grad():
relu_grad = NetReluGrad()
output = relu_grad()
expect = np.array([[[[1, 0, 3], [0, 5, 6], [0, 8, 0]]]]).astype(np.float32)
error = np.ones(shape=[3, 3]) * 1.0e-6
diff = np.abs(output.asnumpy() - expect)
assert np.all(diff < error)

View File

@ -29,7 +29,7 @@ context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
class NetReluGrad(nn.Cell):
def __init__(self):
super(NetReluGrad, self).__init__()
self.rekuGrad = G.ReluGrad()
self.relu_grad = G.ReluGrad()
self.x = Parameter(initializer(Tensor(np.array([[[[-1, 1, 1],
[1, -1, 1],
[1, 1, -1]]]]).astype(np.float32)), [1, 1, 3, 3]), name='x')
@ -38,7 +38,7 @@ class NetReluGrad(nn.Cell):
[1, 1, 1]]]]).astype(np.float32)), [1, 1, 3, 3]), name='dy')
def construct(self):
return self.rekuGrad(self.dy, self.x)
return self.relu_grad(self.dy, self.x)
@pytest.mark.level0
@ -47,7 +47,7 @@ class NetReluGrad(nn.Cell):
def test_relu_grad():
relu_grad = NetReluGrad()
output = relu_grad()
expect = np.array([[[[0, 0, 1,], [0, 0, 0,], [1, 1, 0.]]]]).astype(np.float32)
expect = np.array([[[[0, 0, 1], [0, 0, 0], [1, 1, 0]]]]).astype(np.float32)
error = np.ones(shape=[3, 3]) * 1.0e-6
diff = np.abs(output.asnumpy() - expect)
assert np.all(diff < error)