diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.cc index 6d7cb494f74..792e3a3360e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.cc @@ -15,80 +15,42 @@ */ #include "backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.h" +#include "nnacl/errorcode.h" namespace mindspore { namespace kernel { - template void BroadcastToCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); + size_t input_shape_size = input_shape_.size(); + size_t output_shape_size = output_shape_.size(); - size_t offset = output_shape_.size() - input_shape_.size(); - for (size_t i = 0; i < offset; ++i) { - input_shape_.insert(input_shape_.begin(), 1); + if (output_shape_size < input_shape_size) { + MS_LOG(EXCEPTION) << "Cannot broadcast input tensor with shape " << input_shape_ + << " to a smaller dimension shape " << output_shape_ << "."; + } + if (output_shape_size > MAX_SHAPE_SIZE) { + MS_LOG(EXCEPTION) << "Cannot broadcast input tensor with shape " << input_shape_ << " to a shape " << output_shape_ + << " more than 8-D."; + } + size_t offset = output_shape_size - input_shape_size; + for (size_t i = 0; i < input_shape_size; ++i) { + if (input_shape_[i] != output_shape_[i + offset] && input_shape_[i] != 1) { + MS_LOG(EXCEPTION) << "Cannot broadcast input tensor with shape " << input_shape_ << " to a shape " + << output_shape_ << "."; + } } - for (size_t i = 0; i < input_shape_.size(); ++i) { - if (output_shape_[i] < input_shape_[i] || output_shape_[i] % input_shape_[i] != 0) { - MS_LOG(EXCEPTION) << "Cannot broadcast input tensor with shape " << input_shape_ << " to " - << "output tensor with shape " << output_shape_ - << ". Output shape must be the integer times of input shape at the " << i << " dim!"; - } + for (size_t i = 0; i < input_shape_size; ++i) { + shape_info_.input_shape_[i] = SizeToInt(input_shape_[i]); } - for (size_t j = 0; j < output_shape_.size(); j++) { - nums_ *= output_shape_[j]; - } - - tmp_ptr_ = reinterpret_cast(malloc(nums_ * sizeof(T))); -} - -// BroadcastTo -template -void BroadcastToCPUKernel::BroadcastToImpl(size_t dim) { - if (dim == output_shape_.size() - 1) { - size_t input_nums = 1; - for (size_t j = 0; j < input_shape_.size() - 1; ++j) { - input_nums *= input_shape_[j]; - } - size_t rate = output_shape_[dim] / input_shape_[dim]; - - for (size_t j = 0; j < input_nums; ++j) { - T *in_ptr = input_ptr_ + input_shape_[dim] * j; - for (size_t i = 0; i < rate; ++i) { - T *out_ptr = tmp_ptr_ + (j * rate + i) * input_shape_[dim]; - memcpy_s(out_ptr, input_shape_[dim] * sizeof(T), in_ptr, input_shape_[dim] * sizeof(T)); - } - } - size_t elems = input_shape_[dim] * rate * input_nums; - memcpy_s(output_ptr_, elems * sizeof(T), tmp_ptr_, elems * sizeof(T)); - return; - } - - BroadcastToImpl(dim + 1); - - size_t rate = output_shape_[dim] / input_shape_[dim]; - if (rate > 1) { - size_t elems_nums = 1; - for (size_t j = output_shape_.size() - 1; j > dim; --j) { - elems_nums *= output_shape_[j]; - } - size_t input_nums = 1; - for (size_t j = 0; j < dim; ++j) { - input_nums *= input_shape_[j]; - } - - for (size_t j = 0; j < input_nums; ++j) { - T *in_ptr = output_ptr_ + elems_nums * j; - for (size_t i = 0; i < rate; ++i) { - T *out_ptr = tmp_ptr_ + (j * rate + i) * elems_nums; - memcpy_s(out_ptr, elems_nums * sizeof(T), in_ptr, elems_nums * sizeof(T)); - } - } - size_t elems = elems_nums * rate * input_nums; - memcpy_s(output_ptr_, elems * sizeof(T), tmp_ptr_, elems * sizeof(T)); + for (size_t i = 0; i < output_shape_size; ++i) { + shape_info_.output_shape_[i] = SizeToInt(output_shape_[i]); } + shape_info_.input_shape_size_ = SizeToInt(input_shape_size); + shape_info_.output_shape_size_ = SizeToInt(output_shape_size); } template @@ -96,25 +58,33 @@ bool BroadcastToCPUKernel::Launch(const std::vector &inputs, cons const std::vector &outputs) { if (inputs.size() != 1 || outputs.size() != 1) { MS_LOG(EXCEPTION) << "Wrong number of inputs or outputs!"; - return false; } - if ((inputs[0] == nullptr) || (inputs[0]->size == 0)) { MS_LOG(EXCEPTION) << "Input data is NULL!"; - return false; } - if ((outputs[0] == nullptr) || (outputs[0]->size == 0)) { MS_LOG(EXCEPTION) << "Output data is NULL!"; - return false; } - input_ptr_ = reinterpret_cast(inputs[0]->addr); - output_ptr_ = reinterpret_cast(outputs[0]->addr); + const auto input_addr = reinterpret_cast(inputs[0]->addr); + auto output_addr = reinterpret_cast(outputs[0]->addr); + int ret = NNACL_ERR; + if constexpr (std::is_same_v) { + ret = BroadcastTo(bool, input_addr, &shape_info_, output_addr); + } else if constexpr (std::is_same_v) { + ret = BroadcastTo(int, input_addr, &shape_info_, output_addr); + } else if constexpr (std::is_same_v) { + ret = BroadcastTo(float, input_addr, &shape_info_, output_addr); + } else { + MS_LOG(EXCEPTION) << "Not supported data type for BroadcastTo."; + } - BroadcastToImpl(0); - - return true; + if (ret == NNACL_OK) { + return true; + } + MS_LOG(ERROR) << "Broadcast tensor with shape " << input_shape_ << " to shape " << output_shape_ + << " execute failed."; + return false; } } // namespace kernel diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.h index b535c445b93..6d9c288787c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.h @@ -21,44 +21,32 @@ #include #include "backend/kernel_compiler/cpu/cpu_kernel.h" #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" +#include "nnacl/base/broadcast_to.h" namespace mindspore { namespace kernel { - template class BroadcastToCPUKernel : public CPUKernel { public: BroadcastToCPUKernel() = default; - ~BroadcastToCPUKernel() override { - if (tmp_ptr_ != nullptr) { - free(tmp_ptr_); - tmp_ptr_ = nullptr; - } - }; + ~BroadcastToCPUKernel() = default; bool Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs) override; void InitKernel(const CNodePtr &kernel_node) override; - void BroadcastToImpl(size_t dim); - - size_t Index(const size_t &index, const size_t &dim) { return dim == 1 ? 0 : index; } - private: std::vector input_shape_; std::vector output_shape_; - size_t nums_{1}; - T *input_ptr_{nullptr}; - T *output_ptr_{nullptr}; - T *tmp_ptr_{nullptr}; + BroadcastShapeInfo shape_info_; }; -MS_REG_CPU_KERNEL(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), - BroadcastToCPUKernel); -MS_REG_CPU_KERNEL(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), - BroadcastToCPUKernel); -MS_REG_CPU_KERNEL(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), - BroadcastToCPUKernel); +MS_REG_CPU_KERNEL_T(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + BroadcastToCPUKernel, float); +MS_REG_CPU_KERNEL_T(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + BroadcastToCPUKernel, int); +MS_REG_CPU_KERNEL_T(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), + BroadcastToCPUKernel, bool); } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc index eeecddf5abe..fb9f1d88a09 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc @@ -18,28 +18,32 @@ #include "backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h" #include "common/thread_pool.h" #include "runtime/device/cpu/cpu_device_address.h" +#include "nnacl/fp32_grad/activation_grad.h" +#include "nnacl/errorcode.h" namespace mindspore { namespace kernel { template void EltWiseGradCPUKernel::ReluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) { - for (size_t i = start; i < end; i++) { - if (input2[i] > 0) { - out[i] = input1[i]; - } else { - out[i] = 0; + if constexpr (std::is_same_v) { + int ret = ::ReluGrad(input1 + start, input2 + start, end - start, out + start); + if (ret == NNACL_ERR) { + MS_LOG(EXCEPTION) << "ReLUGrad failed."; } + } else { + MS_LOG(EXCEPTION) << "ReLUGrad only support float"; } } template void EltWiseGradCPUKernel::ReLU6Grad(const T *input1, const T *input2, T *out, size_t start, size_t end) { - for (size_t i = start; i < end; i++) { - if (input2[i] > 0 && input2[i] <= 6) { - out[i] = input1[i]; - } else { - out[i] = 0; + if constexpr (std::is_same_v) { + int ret = ::Relu6Grad(input1 + start, input2 + start, end - start, out + start); + if (ret == NNACL_ERR) { + MS_LOG(EXCEPTION) << "ReLU6Grad failed."; } + } else { + MS_LOG(EXCEPTION) << "ReLU6Grad only support float"; } } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/CMakeLists.txt b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/CMakeLists.txt index 7263e16d793..a9b70536809 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/CMakeLists.txt +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/CMakeLists.txt @@ -30,12 +30,9 @@ file(GLOB KERNEL_SRC ${NNACL_DIR}/int8/*.c ${NNACL_DIR}/infer/*.c ${NNACL_DIR}/base/*.c + ${NNACL_DIR}/fp32_grad/*.c ) -if(SUPPORT_TRAIN) - file(GLOB TRAIN_SRC ${NNACL_DIR}/fp32_grad/*.c) -endif() - if(PLATFORM_ARM64) file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm64/*.S) set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C) diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.c new file mode 100644 index 00000000000..cd6eff53856 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.c @@ -0,0 +1,95 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnacl/base/broadcast_to.h" +#include +#include "nnacl/op_base.h" +#include "nnacl/errorcode.h" + +size_t accumulate(const int *shape, int start, int end) { + size_t product = 1; + for (int i = start; i <= end; ++i) { + product *= (size_t)shape[i]; + } + return product; +} + +void pad_input_shape(int *input_shape, int input_shape_len, int output_shape_len) { + if (input_shape_len < output_shape_len) { + const int shape_gap = output_shape_len - input_shape_len; + for (int i = input_shape_len - 1; i >= 0; --i) { + input_shape[i + shape_gap] = input_shape[i]; + } + for (int i = 0; i < shape_gap; ++i) { + input_shape[i] = 1; + } + } +} + +#define BROADCAST_TO(type) \ + int broadcast_to_##type(const type *input, BroadcastShapeInfo *shape_info, type *output) { \ + if (shape_info->output_shape_size_ > MAX_SHAPE_SIZE) { \ + return NNACL_ERR; \ + } \ + int *input_shape = shape_info->input_shape_; \ + const int *output_shape = shape_info->output_shape_; \ + const int dim_max = shape_info->output_shape_size_ - 1; \ + const size_t bool_length = 1, number_length = 4; \ + const size_t data_length = strcmp(#type, "bool") ? number_length : bool_length; \ + const size_t temp_length = accumulate(output_shape, 0, dim_max); \ + type *data_temp = (type *)malloc(temp_length * data_length); \ + if (data_temp == NULL) { \ + return NNACL_ERR; \ + } \ + pad_input_shape(input_shape, shape_info->input_shape_size_, dim_max + 1); \ + shape_info->input_shape_size_ = dim_max + 1; \ + \ + size_t before_dim_elements_num = accumulate(input_shape, 0, dim_max - 1); \ + size_t after_dim_elements_num = input_shape[dim_max]; \ + size_t dim_broadcast_rate = (size_t)(output_shape[dim_max] / input_shape[dim_max]); \ + for (size_t i = 0; i < before_dim_elements_num; ++i) { \ + const type *in_ptr = input + i * after_dim_elements_num; \ + for (size_t j = 0; j < dim_broadcast_rate; ++j) { \ + type *out_ptr = output + (i * dim_broadcast_rate + j) * after_dim_elements_num; \ + memcpy(out_ptr, in_ptr, after_dim_elements_num *data_length); \ + } \ + } \ + \ + int dim_index = dim_max - 1; \ + while (dim_index >= 0) { \ + dim_broadcast_rate = (size_t)(output_shape[dim_index] / input_shape[dim_index]); \ + if (dim_broadcast_rate > 1) { \ + before_dim_elements_num = accumulate(input_shape, 0, dim_index - 1); \ + after_dim_elements_num = accumulate(output_shape, dim_index + 1, dim_max); \ + for (size_t i = 0; i < before_dim_elements_num; ++i) { \ + type *in_ptr = output + i * after_dim_elements_num; \ + for (size_t j = 0; j < dim_broadcast_rate; ++j) { \ + type *out_ptr = data_temp + (i * dim_broadcast_rate + j) * after_dim_elements_num; \ + memcpy(out_ptr, in_ptr, after_dim_elements_num *data_length); \ + } \ + } \ + size_t elements_total = before_dim_elements_num * dim_broadcast_rate * after_dim_elements_num; \ + memcpy(output, data_temp, elements_total *data_length); \ + } \ + --dim_index; \ + } \ + free(data_temp); \ + return NNACL_OK; \ + } + +BROADCAST_TO(int) +BROADCAST_TO(float) +BROADCAST_TO(bool) diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/broadcast_to_fp32.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.h similarity index 55% rename from mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/broadcast_to_fp32.h rename to mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.h index e59c0158c9d..4092bec85f3 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/broadcast_to_fp32.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,18 +13,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_NNACL_FP32_BROADCAST_TO_FP32_H_ -#define MINDSPORE_NNACL_FP32_BROADCAST_TO_FP32_H_ +#ifndef MINDSPORE_NNACL_FP32_BROADCAST_TO_H_ +#define MINDSPORE_NNACL_FP32_BROADCAST_TO_H_ -#include "nnacl/op_base.h" #include "nnacl/broadcast_to_parameter.h" #ifdef __cplusplus extern "C" { #endif -int BroadcastTo(const float *input, BroadcastShapeInfo *shape_info, float *output); +#define BroadcastTo(type, input, shape_info, output) broadcast_to_##type(input, shape_info, output) +int broadcast_to_int(const int *input, BroadcastShapeInfo *shape_info, int *output); +int broadcast_to_float(const float *input, BroadcastShapeInfo *shape_info, float *output); +int broadcast_to_bool(const bool *input, BroadcastShapeInfo *shape_info, bool *output); #ifdef __cplusplus } #endif -#endif // MINDSPORE_NNACL_FP32_BROADCAST_TO_FP32_H_ +#endif // MINDSPORE_NNACL_FP32_BROADCAST_TO_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/broadcast_to_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/broadcast_to_parameter.h index 074dbb9111c..874c246b212 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/broadcast_to_parameter.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/broadcast_to_parameter.h @@ -20,14 +20,14 @@ typedef struct BroadcastToParameter { OpParameter op_parameter_; - int shape_[COMM_SHAPE_SIZE]; + int shape_[MAX_SHAPE_SIZE]; size_t shape_size_; } BroadcastToParameter; typedef struct BroadcastShapeInfo { - int input_shape_[COMM_SHAPE_SIZE]; + int input_shape_[MAX_SHAPE_SIZE]; int input_shape_size_; - int output_shape_[COMM_SHAPE_SIZE]; + int output_shape_[MAX_SHAPE_SIZE]; int output_shape_size_; } BroadcastShapeInfo; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/broadcast_to_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/broadcast_to_fp32.c deleted file mode 100644 index 73202f663fe..00000000000 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/broadcast_to_fp32.c +++ /dev/null @@ -1,103 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "nnacl/fp32/broadcast_to_fp32.h" -#include -#include "nnacl/op_base.h" -#include "nnacl/errorcode.h" - -void PadBroadcastShapeInfo(BroadcastShapeInfo *shape_info) { - if (shape_info->input_shape_size_ < DIMENSION_4D) { - int input_shape_tmp[DIMENSION_4D]; - for (int i = 0; i < shape_info->input_shape_size_; ++i) { - input_shape_tmp[i] = shape_info->input_shape_[i]; - } - int input_shape_index = shape_info->input_shape_size_ - 1; - for (int i = DIMENSION_4D - 1; i >= 0; --i) { - if (input_shape_index >= 0) { - shape_info->input_shape_[i] = input_shape_tmp[input_shape_index--]; - } else { - shape_info->input_shape_[i] = 1; - } - } - } - if (shape_info->output_shape_size_ < DIMENSION_4D) { - int output_shape_tmp[DIMENSION_4D]; - for (int i = 0; i < shape_info->output_shape_size_; ++i) { - output_shape_tmp[i] = shape_info->output_shape_[i]; - } - int output_shape_index = shape_info->output_shape_size_ - 1; - for (int i = DIMENSION_4D - 1; i >= 0; --i) { - if (output_shape_index >= 0) { - shape_info->output_shape_[i] = output_shape_tmp[output_shape_index--]; - } else { - shape_info->output_shape_[i] = 1; - } - } - } -} - -int BroadcastTo(const float *input, BroadcastShapeInfo *shape_info, float *output) { - if (shape_info->input_shape_size_ > DIMENSION_4D || shape_info->output_shape_size_ > DIMENSION_4D) { - return NNACL_ERR; - } - PadBroadcastShapeInfo(shape_info); - size_t input_dim_offset[DIMENSION_4D - 1]; - input_dim_offset[2] = shape_info->input_shape_[3] * 4; - input_dim_offset[1] = input_dim_offset[2] * shape_info->input_shape_[2]; - input_dim_offset[0] = input_dim_offset[1] * shape_info->input_shape_[1]; - size_t output_dim_offset[DIMENSION_4D - 1]; - output_dim_offset[2] = shape_info->output_shape_[3] * 4; - output_dim_offset[1] = output_dim_offset[2] * shape_info->output_shape_[2]; - output_dim_offset[0] = output_dim_offset[1] * shape_info->output_shape_[1]; - uint8_t *in_base = (uint8_t *)input; - uint8_t *out_base = (uint8_t *)(output); - for (int32_t dim0 = 0; dim0 < shape_info->input_shape_[0]; ++dim0) { - for (int32_t dim1 = 0; dim1 < shape_info->input_shape_[1]; ++dim1) { - for (int32_t dim2 = 0; dim2 < shape_info->input_shape_[2]; ++dim2) { - if (shape_info->input_shape_[3] == shape_info->output_shape_[3]) { - memcpy(out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1 + output_dim_offset[2] * dim2, - in_base + input_dim_offset[0] * dim0 + input_dim_offset[1] * dim1 + input_dim_offset[2] * dim2, - input_dim_offset[2]); - } else { - for (int32_t dim3 = 0; dim3 < shape_info->output_shape_[3]; ++dim3) { - memcpy(out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1 + output_dim_offset[2] * dim2 + - dim3 * 4, - in_base + input_dim_offset[0] * dim0 + input_dim_offset[1] * dim1 + input_dim_offset[2] * dim2, 4); - } - } - } - if (shape_info->input_shape_[2] != shape_info->output_shape_[2]) { - for (int32_t dim2 = 0; dim2 < shape_info->output_shape_[2]; ++dim2) { - memcpy(out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1 + dim2 * output_dim_offset[2], - out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1, output_dim_offset[2]); - } - } - } - if (shape_info->input_shape_[1] != shape_info->output_shape_[1]) { - for (int32_t dim1 = 0; dim1 < shape_info->output_shape_[1]; ++dim1) { - memcpy(out_base + output_dim_offset[0] * dim0 + output_dim_offset[1] * dim1, - out_base + output_dim_offset[0] * dim0, output_dim_offset[1]); - } - } - } - if (shape_info->input_shape_[0] != shape_info->output_shape_[0]) { - for (int32_t dim0 = 0; dim0 < shape_info->output_shape_[0]; ++dim0) { - memcpy(out_base + output_dim_offset[0] * dim0, out_base, output_dim_offset[0]); - } - } - return NNACL_OK; -} diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.c index ff507f917b2..488d413727b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.c @@ -20,7 +20,7 @@ #include "nnacl/fp32_grad/activation_grad.h" #include "nnacl/errorcode.h" -inline int ReluGrad(float *src0, float *src1, size_t length, float *dst) { +int ReluGrad(const float *src0, const float *src1, size_t length, float *dst) { int i = 0; #ifdef ENABLE_ARM float32x4_t zero_4 = vdupq_n_f32(0.0f); @@ -38,7 +38,7 @@ inline int ReluGrad(float *src0, float *src1, size_t length, float *dst) { return NNACL_OK; } -int Relu6Grad(float *src0, float *src1, size_t length, float *dst) { +int Relu6Grad(const float *src0, const float *src1, size_t length, float *dst) { int i = 0; #ifdef ENABLE_ARM float32x4_t zero_4 = vdupq_n_f32(0.0f); @@ -59,28 +59,28 @@ int Relu6Grad(float *src0, float *src1, size_t length, float *dst) { return NNACL_OK; } -int LReluGrad(float *src0, float *src1, size_t length, float *dst, float alpha) { +int LReluGrad(const float *src0, const float *src1, size_t length, float *dst, float alpha) { for (size_t i = 0; i < length; ++i) { dst[i] = src1[i] > 0.0f ? src0[i] : alpha * src0[i]; } return NNACL_OK; } -int SigmoidGrad(float *src0, float *src1, size_t length, float *dst) { +int SigmoidGrad(const float *src0, const float *src1, size_t length, float *dst) { for (size_t i = 0; i < length; ++i) { dst[i] = src0[i] * (src1[i] * (1.0f - src1[i])); } return NNACL_OK; } -int TanhGrad(float *src0, float *src1, size_t length, float *dst) { +int TanhGrad(const float *src0, const float *src1, size_t length, float *dst) { for (size_t i = 0; i < length; ++i) { dst[i] = (1.0f - (src1[i] * src1[i])) * src0[i]; } return NNACL_OK; } -int HSwishGrad(float *src0, float *src1, size_t length, float *dst) { +int HSwishGrad(const float *src0, const float *src1, size_t length, float *dst) { for (size_t i = 0; i < length; ++i) { float tmp = (src1[i] > 3.0f ? 1.0f : (src1[i] < -3.0f ? 0.0f : (2.0f * src1[i] + 3.0f) / 6.0f)); dst[i] = tmp * src0[i]; @@ -88,7 +88,7 @@ int HSwishGrad(float *src0, float *src1, size_t length, float *dst) { return NNACL_OK; } -int HSigmoidGrad(float *src0, float *src1, size_t length, float *dst) { +int HSigmoidGrad(const float *src0, const float *src1, size_t length, float *dst) { for (size_t i = 0; i < length; ++i) { float tmp = (src1[i] > 3.0f ? 0.0f : (src1[i] < -3.0f ? 0.0f : 1.0f / 6.0f)); dst[i] = tmp * src0[i]; @@ -96,14 +96,14 @@ int HSigmoidGrad(float *src0, float *src1, size_t length, float *dst) { return NNACL_OK; } -int EluGrad(float *src0, float *src1, size_t length, float *dst, float alpha) { +int EluGrad(const float *src0, const float *src1, size_t length, float *dst, float alpha) { for (size_t i = 0; i < length; ++i) { dst[i] = (src1[i] > 0.0f ? src0[i] : alpha * expm1(src1[i]) * src0[i]); } return NNACL_OK; } -int GeluGrad(float *src0, float *src1, size_t length, float *dst) { +int GeluGrad(const float *src0, const float *src1, size_t length, float *dst) { for (size_t i = 0; i < length; ++i) { dst[i] = src0[i] * ((0.5 * (1.0 + erf(src1[i] / 1.4142135623730951))) + (src1[i] * exp(-0.5 * src1[i] * src1[i]) / 2.5066282746)); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.h index 8317571386a..e88b27addb5 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.h @@ -30,15 +30,15 @@ typedef struct ActivationGradParameter { extern "C" { #endif -int ReluGrad(float *src0, float *src1, size_t length, float *dst); -int Relu6Grad(float *src0, float *src1, size_t length, float *dst); -int LReluGrad(float *src0, float *src1, size_t length, float *dst, float alpha); -int SigmoidGrad(float *src0, float *src1, size_t length, float *dst); -int TanhGrad(float *src0, float *src1, size_t length, float *dst); -int HSwishGrad(float *src0, float *src1, size_t length, float *dst); -int HSigmoidGrad(float *src0, float *src1, size_t length, float *dst); -int EluGrad(float *src0, float *src1, size_t length, float *dst, float alpha); -int GeluGrad(float *src0, float *src1, size_t length, float *dst); +int ReluGrad(const float *src0, const float *src1, size_t length, float *dst); +int Relu6Grad(const float *src0, const float *src1, size_t length, float *dst); +int LReluGrad(const float *src0, const float *src1, size_t length, float *dst, float alpha); +int SigmoidGrad(const float *src0, const float *src1, size_t length, float *dst); +int TanhGrad(const float *src0, const float *src1, size_t length, float *dst); +int HSwishGrad(const float *src0, const float *src1, size_t length, float *dst); +int HSigmoidGrad(const float *src0, const float *src1, size_t length, float *dst); +int EluGrad(const float *src0, const float *src1, size_t length, float *dst, float alpha); +int GeluGrad(const float *src0, const float *src1, size_t length, float *dst); #ifdef __cplusplus } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/broadcast_to_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/broadcast_to_infer.h index 90d818efea7..5688984a969 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/broadcast_to_infer.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/broadcast_to_infer.h @@ -17,7 +17,7 @@ #define MINDSPORE_NNACL_BROADCAST_TO_INFER_H #include "nnacl/infer/common_infer.h" -#include "nnacl/fp32/broadcast_to_fp32.h" +#include "nnacl/base/broadcast_to.h" #ifdef __cplusplus extern "C" { diff --git a/mindspore/ccsrc/cxx_api/CMakeLists.txt b/mindspore/ccsrc/cxx_api/CMakeLists.txt index bfb4ad08c97..b8913c76a03 100644 --- a/mindspore/ccsrc/cxx_api/CMakeLists.txt +++ b/mindspore/ccsrc/cxx_api/CMakeLists.txt @@ -57,7 +57,7 @@ else() endif() if(ENABLE_CPU) - target_link_libraries(mindspore_shared_lib PRIVATE mindspore::dnnl mindspore::mkldnn) + target_link_libraries(mindspore_shared_lib PRIVATE mindspore::dnnl mindspore::mkldnn nnacl) endif() if(USE_GLOG) diff --git a/mindspore/lite/src/ops/populate/broadcast_to_populate.cc b/mindspore/lite/src/ops/populate/broadcast_to_populate.cc index 33d8817d5ba..427c74d1847 100644 --- a/mindspore/lite/src/ops/populate/broadcast_to_populate.cc +++ b/mindspore/lite/src/ops/populate/broadcast_to_populate.cc @@ -14,7 +14,7 @@ * limitations under the License. */ #include "src/ops/populate/populate_register.h" -#include "nnacl/fp32/broadcast_to_fp32.h" +#include "nnacl/base/broadcast_to.h" using mindspore::schema::PrimitiveType_BroadcastTo; namespace mindspore { diff --git a/mindspore/lite/src/ops/populate/v0/broadcast_to_populate_v0.cc b/mindspore/lite/src/ops/populate/v0/broadcast_to_populate_v0.cc index 2d9a37bad62..fa78b3af3d1 100644 --- a/mindspore/lite/src/ops/populate/v0/broadcast_to_populate_v0.cc +++ b/mindspore/lite/src/ops/populate/v0/broadcast_to_populate_v0.cc @@ -16,7 +16,7 @@ #include "schema/model_v0_generated.h" #include "src/ops/populate/populate_register.h" -#include "nnacl/fp32/broadcast_to_fp32.h" +#include "nnacl/base/broadcast_to.h" namespace mindspore { namespace lite { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc index dbd664bbe2b..f1ce54d2959 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc @@ -49,10 +49,10 @@ int BroadcastToCPUKernel::Init() { } int BroadcastToCPUKernel::Run() { - auto input_data = reinterpret_cast(in_tensors_.at(0)->MutableData()); + const auto input_data = reinterpret_cast(in_tensors_.at(0)->MutableData()); auto output_data = reinterpret_cast(out_tensors_.at(0)->MutableData()); - return BroadcastTo(input_data, &shape_info_, output_data); + return BroadcastTo(float, input_data, &shape_info_, output_data); } REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_BroadcastTo, LiteKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.h index 9415079d532..c54dc4407de 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.h @@ -19,7 +19,7 @@ #include #include "src/lite_kernel.h" -#include "nnacl/fp32/broadcast_to_fp32.h" +#include "nnacl/base/broadcast_to.h" namespace mindspore::kernel { class BroadcastToCPUKernel : public LiteKernel { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.cc index 9619ea4f77d..62caf197e98 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.cc @@ -45,8 +45,8 @@ int ActivationGradCPUKernel::Init() { int ActivationGradCPUKernel::ReSize() { return RET_OK; } int ActivationGradCPUKernel::DoActivation(int task_id) { - auto yt_addr = reinterpret_cast(in_tensors_.at(0)->MutableData()); - auto input_addr = reinterpret_cast(in_tensors_.at(1)->MutableData()); + const auto yt_addr = reinterpret_cast(in_tensors_.at(0)->MutableData()); + const auto input_addr = reinterpret_cast(in_tensors_.at(1)->MutableData()); auto output_addr = reinterpret_cast(out_tensors_.at(0)->MutableData()); int length = in_tensors_.at(0)->ElementsNum(); diff --git a/tests/st/ops/cpu/test_broadcast_to_op.py b/tests/st/ops/cpu/test_broadcast_to_op.py index 05b0969c0d6..bb749b6e4b7 100644 --- a/tests/st/ops/cpu/test_broadcast_to_op.py +++ b/tests/st/ops/cpu/test_broadcast_to_op.py @@ -33,6 +33,24 @@ def test_broadcast(): expect = np.broadcast_to(x_np, shape) assert np.allclose(output.asnumpy(), expect) + shape = (3, 5, 7, 4, 5, 6) + x_np = np.arange(20).reshape((4, 5, 1)).astype(np.int32) + output = P.BroadcastTo(shape)(Tensor(x_np)) + expect = np.broadcast_to(x_np, shape) + assert np.allclose(output.asnumpy(), expect) + + shape = (8, 5, 7, 4, 5, 6) + x_np = np.arange(24).reshape((1, 4, 1, 6)).astype(np.bool) + 0.2 + output = P.BroadcastTo(shape)(Tensor(x_np)) + expect = np.broadcast_to(x_np, shape) + assert np.allclose(output.asnumpy(), expect) + + shape = (4, 5, 2, 3, 4, 5, 6) + x_np = np.random.rand(2, 3, 1, 5, 1).astype(np.float32) + output = P.BroadcastTo(shape)(Tensor(x_np)) + expect = np.broadcast_to(x_np, shape) + assert np.allclose(output.asnumpy(), expect) + shape = (3, 4, 5, 6) x_np = np.random.rand(3, 1, 5, 1).astype(np.float32) output = P.BroadcastTo(shape)(Tensor(x_np)) @@ -50,6 +68,12 @@ def test_broadcast(): expect = np.broadcast_to(x1_np, shape) assert np.allclose(output.asnumpy(), expect) + shape = (4, 5) + x1_np = np.ones((1,)).astype(np.bool_) + output = P.BroadcastTo(shape)(Tensor(x1_np)) + expect = np.broadcast_to(x1_np, shape) + assert np.allclose(output.asnumpy(), expect) + @pytest.mark.level0 @pytest.mark.platform_x86_gpu_training diff --git a/tests/st/ops/cpu/test_relu6_grad_op.py b/tests/st/ops/cpu/test_relu6_grad_op.py new file mode 100644 index 00000000000..b5e2725319a --- /dev/null +++ b/tests/st/ops/cpu/test_relu6_grad_op.py @@ -0,0 +1,53 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.common.initializer import initializer +from mindspore.common.parameter import Parameter +from mindspore.ops.operations import _grad_ops as G + +context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + + +class NetReluGrad(nn.Cell): + def __init__(self): + super(NetReluGrad, self).__init__() + self.relu6_grad = G.ReLU6Grad() + self.x = Parameter(initializer(Tensor(np.array([[[[1, 0, 6], + [-2, 3, 6], + [-3, 1, 8]]]]).astype(np.float32)), [1, 1, 3, 3]), name='x') + self.dy = Parameter(initializer(Tensor(np.array([[[[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]]]).astype(np.float32)), [1, 1, 3, 3]), name='dy') + + def construct(self): + return self.relu6_grad(self.dy, self.x) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_relu_grad(): + relu_grad = NetReluGrad() + output = relu_grad() + expect = np.array([[[[1, 0, 3], [0, 5, 6], [0, 8, 0]]]]).astype(np.float32) + error = np.ones(shape=[3, 3]) * 1.0e-6 + diff = np.abs(output.asnumpy() - expect) + assert np.all(diff < error) diff --git a/tests/st/ops/cpu/test_relu_grad_op.py b/tests/st/ops/cpu/test_relu_grad_op.py index e76eaae87df..82c821351c6 100644 --- a/tests/st/ops/cpu/test_relu_grad_op.py +++ b/tests/st/ops/cpu/test_relu_grad_op.py @@ -29,7 +29,7 @@ context.set_context(mode=context.GRAPH_MODE, device_target='CPU') class NetReluGrad(nn.Cell): def __init__(self): super(NetReluGrad, self).__init__() - self.rekuGrad = G.ReluGrad() + self.relu_grad = G.ReluGrad() self.x = Parameter(initializer(Tensor(np.array([[[[-1, 1, 1], [1, -1, 1], [1, 1, -1]]]]).astype(np.float32)), [1, 1, 3, 3]), name='x') @@ -38,7 +38,7 @@ class NetReluGrad(nn.Cell): [1, 1, 1]]]]).astype(np.float32)), [1, 1, 3, 3]), name='dy') def construct(self): - return self.rekuGrad(self.dy, self.x) + return self.relu_grad(self.dy, self.x) @pytest.mark.level0 @@ -47,7 +47,7 @@ class NetReluGrad(nn.Cell): def test_relu_grad(): relu_grad = NetReluGrad() output = relu_grad() - expect = np.array([[[[0, 0, 1,], [0, 0, 0,], [1, 1, 0.]]]]).astype(np.float32) + expect = np.array([[[[0, 0, 1], [0, 0, 0], [1, 1, 0]]]]).astype(np.float32) error = np.ones(shape=[3, 3]) * 1.0e-6 diff = np.abs(output.asnumpy() - expect) assert np.all(diff < error)