From f2e9d9cfc7534de7a376ef54185cba11fc1de2e2 Mon Sep 17 00:00:00 2001 From: zhaoting Date: Wed, 14 Oct 2020 16:39:40 +0800 Subject: [PATCH] add some cpu operator --- .../cpu/arithmetic_cpu_kernel.cc | 88 +++++++-- .../cpu/arithmetic_cpu_kernel.h | 39 +++- .../cpu/arithmetic_self_cpu_kernel.cc | 23 +-- .../cpu/arithmetic_self_cpu_kernel.h | 8 +- .../kernel_compiler/cpu/cast_cpu_kernel.cc | 82 ++++++++ .../kernel_compiler/cpu/cast_cpu_kernel.h | 54 ++++++ .../backend/kernel_compiler/cpu/cpu_kernel.h | 30 ++- .../cpu/eltwise_grad_cpu_kernel.cc | 177 ++++++++++++++++++ .../cpu/eltwise_grad_cpu_kernel.h | 87 +++++++++ .../cpu/mkldnn/eltwise_cpu_kernel.cc | 76 ++++++++ .../cpu/mkldnn/eltwise_cpu_kernel.h | 60 ++++++ .../cpu/mkldnn/fused_batch_norm_cpu_kernel.cc | 3 +- .../cpu/mkldnn/fused_batch_norm_cpu_kernel.h | 5 +- .../fused_batch_norm_gard_cpu_kernel.cc | 110 +++++++++++ .../mkldnn/fused_batch_norm_gard_cpu_kernel.h | 61 ++++++ .../cpu/mkldnn/mul_cpu_kernel.cc | 66 +++++-- .../cpu/mkldnn/mul_cpu_kernel.h | 3 + .../cpu/mkldnn/relu_cpu_kernel.cc | 59 ------ .../cpu/mkldnn/relu_cpu_kernel.h | 42 ----- .../cpu/mkldnn/relu_grad_cpu_kernel.cc | 69 ------- .../cpu/mkldnn/relu_grad_cpu_kernel.h | 43 ----- .../cpu/mkldnn/tensoradd_cpu_kernel.cc | 55 ++++-- .../cpu/mkldnn/tensoradd_cpu_kernel.h | 3 + .../kernel_compiler/cpu/reshape_cpu_kernel.h | 4 + mindspore/ops/_grad/grad_nn_ops.py | 10 +- mindspore/ops/operations/_grad_ops.py | 16 ++ mindspore/ops/operations/nn_ops.py | 1 + tests/st/ops/cpu/test_abs_op.py | 60 ++++++ tests/st/ops/cpu/test_batchnorm_op.py | 36 ++++ tests/st/ops/cpu/test_cast_op.py | 76 ++++++++ tests/st/ops/cpu/test_exp_op.py | 56 ++++++ tests/st/ops/cpu/test_less_op.py | 83 ++++++++ tests/st/ops/cpu/test_log_op.py | 56 ++++++ tests/st/ops/cpu/test_mul_op.py | 49 +++-- tests/st/ops/cpu/test_neg_op.py | 60 ++++++ tests/st/ops/cpu/test_pow_op.py | 58 ++++++ tests/st/ops/cpu/test_realdiv_op.py | 95 ++++++++++ tests/st/ops/cpu/test_relu6_op.py | 30 ++- tests/st/ops/cpu/test_relu_grad_op.py | 2 +- tests/st/ops/cpu/test_sigmoid_op.py | 78 ++++++++ tests/st/ops/cpu/test_sqrt_op.py | 75 ++++++++ tests/st/ops/cpu/test_square_op.py | 63 +++++++ tests/st/ops/cpu/test_tanh_op.py | 63 +++++++ tests/st/ops/cpu/test_tensoradd.py | 43 ++++- 44 files changed, 1946 insertions(+), 311 deletions(-) create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/cast_cpu_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/cast_cpu_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_gard_cpu_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_gard_cpu_kernel.h delete mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.cc delete mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h delete mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.cc delete mode 100644 mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h create mode 100644 tests/st/ops/cpu/test_abs_op.py create mode 100644 tests/st/ops/cpu/test_cast_op.py create mode 100644 tests/st/ops/cpu/test_exp_op.py create mode 100644 tests/st/ops/cpu/test_less_op.py create mode 100644 tests/st/ops/cpu/test_log_op.py create mode 100644 tests/st/ops/cpu/test_neg_op.py create mode 100644 tests/st/ops/cpu/test_pow_op.py create mode 100644 tests/st/ops/cpu/test_realdiv_op.py create mode 100644 tests/st/ops/cpu/test_sigmoid_op.py create mode 100644 tests/st/ops/cpu/test_sqrt_op.py create mode 100644 tests/st/ops/cpu/test_square_op.py create mode 100644 tests/st/ops/cpu/test_tanh_op.py diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.cc index 6792b815d9b..011cf16abdb 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.cc @@ -13,9 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h" -#include +#include #include +#include +#include "backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore { @@ -52,13 +53,35 @@ void ArithmeticCPUKernel::Mul(const T *input1, const T *input2, T *out, size_t s } template -void ArithmeticCPUKernel::Div(const T *input1, const T *input2, T *out, size_t start, size_t end) { +void ArithmeticCPUKernel::RealDiv(const T *input1, const T *input2, T *out, size_t start, size_t end) { for (size_t i = start; i < end; i++) { - auto div_number = input2[i]; + std::vector idx; + GenIndex(i, &idx); + auto div_number = input2[idx[1]]; if (div_number == 0) { MS_LOG(EXCEPTION) << "Cannot divided by 0!"; } - out[i] = input1[i] / div_number; + out[i] = input1[idx[0]] / div_number; + } +} + +template +void ArithmeticCPUKernel::Pow(const T *input1, const T *input2, T *out, size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + std::vector idx; + GenIndex(i, &idx); + auto x = static_cast(input1[idx[0]]); + auto y = static_cast(input2[idx[1]]); + out[i] = static_cast(std::pow(x, y)); + } +} + +template +void ArithmeticCPUKernel::Less(const T *input1, const T *input2, bool *out, size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + std::vector idx; + GenIndex(i, &idx); + out[i] = input1[idx[0]] < input2[idx[1]]; } } @@ -71,10 +94,16 @@ void ArithmeticCPUKernel::InitKernel(const CNodePtr &kernel_node) { operate_type_ = SUB; } else if (kernel_name == prim::kPrimMul->name()) { operate_type_ = MUL; - } else if (kernel_name == "Div") { - operate_type_ = DIV; + } else if (kernel_name == prim::kPrimRealDiv->name()) { + operate_type_ = REALDIV; + } else if (kernel_name == prim::kPrimPow->name()) { + operate_type_ = POW; + } else if (kernel_name == prim::kPrimLess->name()) { + operate_type_ = LESS; } else if (kernel_name == prim::kPrimAssignAdd->name()) { operate_type_ = ASSIGNADD; + } else { + MS_LOG(EXCEPTION) << "Not support " << kernel_name; } input_shape0_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); @@ -145,14 +174,45 @@ void ArithmeticCPUKernel::GenIndex(size_t num, std::vector *idx) { idx->push_back(idx0); idx->push_back(idx1); } + +template +void ArithmeticCPUKernel::LaunchLess(const std::vector &inputs, const std::vector &outputs) { + T *input1 = reinterpret_cast(inputs[0]->addr); + T *input2 = reinterpret_cast(inputs[1]->addr); + bool *output = reinterpret_cast(outputs[0]->addr); + + size_t lens = outputs[0]->size > 0 ? static_cast(outputs[0]->size / sizeof(T)) : 1; + auto max_thread_num = std::thread::hardware_concurrency(); + size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; + MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; + std::vector threads; + threads.reserve(thread_num); + size_t start = 0; + size_t once_compute_size = (lens + thread_num - 1) / thread_num; + while (start < lens) { + size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size); + threads.emplace_back(std::thread(&ArithmeticCPUKernel::Less, this, input1, input2, output, start, end)); + start += once_compute_size; + } + for (size_t i = 0; i < threads.size(); ++i) { + threads[i].join(); + } +} + template void ArithmeticCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { + if (operate_type_ == LESS) { + LaunchLess(inputs, outputs); + return; + } T *input1 = reinterpret_cast(inputs[0]->addr); T *input2 = reinterpret_cast(inputs[1]->addr); T *output = reinterpret_cast(outputs[0]->addr); - auto lens = outputs[0]->size / sizeof(T); - size_t thread_num = lens < 128 * 24 ? std::ceil(lens / 128.0) : 24; - MS_LOG(INFO) << "lens=" << lens << "; use thread_num=" << thread_num; + + size_t lens = outputs[0]->size > 0 ? static_cast(outputs[0]->size / sizeof(T)) : 1; + auto max_thread_num = std::thread::hardware_concurrency(); + size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; + MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; std::vector threads; threads.reserve(thread_num); size_t start = 0; @@ -165,10 +225,14 @@ void ArithmeticCPUKernel::LaunchKernel(const std::vector &inputs, co threads.emplace_back(std::thread(&ArithmeticCPUKernel::Sub, this, input1, input2, output, start, end)); } else if (operate_type_ == MUL) { threads.emplace_back(std::thread(&ArithmeticCPUKernel::Mul, this, input1, input2, output, start, end)); - } else if (operate_type_ == DIV) { - threads.emplace_back(std::thread(&ArithmeticCPUKernel::Div, this, input1, input2, output, start, end)); + } else if (operate_type_ == REALDIV) { + threads.emplace_back(std::thread(&ArithmeticCPUKernel::RealDiv, this, input1, input2, output, start, end)); + } else if (operate_type_ == POW) { + threads.emplace_back(std::thread(&ArithmeticCPUKernel::Pow, this, input1, input2, output, start, end)); } else if (operate_type_ == ASSIGNADD) { threads.emplace_back(std::thread(&ArithmeticCPUKernel::AssignAdd, this, input1, input2, output, start, end)); + } else { + MS_LOG(EXCEPTION) << "Not support " << operate_type_; } start += once_compute_size; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h index 3de579b5502..2f5ce298a00 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h @@ -15,8 +15,8 @@ */ #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARITHMETIC_CPU_KERNEL_H_ #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARITHMETIC_CPU_KERNEL_H_ -#include #include +#include #include "backend/kernel_compiler/cpu/cpu_kernel.h" #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" @@ -31,7 +31,8 @@ class ArithmeticCPUKernel : public CPUKernel { bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) override; - + template + void LaunchLess(const std::vector &inputs, const std::vector &outputs); template void LaunchKernel(const std::vector &inputs, const std::vector &outputs); @@ -44,9 +45,13 @@ class ArithmeticCPUKernel : public CPUKernel { template void Mul(const T *input1, const T *input2, T *out, size_t start, size_t end); template - void Div(const T *input1, const T *input2, T *out, size_t start, size_t end); + void RealDiv(const T *input1, const T *input2, T *out, size_t start, size_t end); + template + void Pow(const T *input1, const T *input2, T *out, size_t start, size_t end); template void AssignAdd(T *input1, const T *input2, T *out, size_t start, size_t end); + template + void Less(const T *input1, const T *input2, bool *out, size_t start, size_t end); std::vector input_shape0_; std::vector input_shape1_; std::vector input_element_num0_; @@ -66,6 +71,34 @@ MS_REG_CPU_KERNEL( MS_REG_CPU_KERNEL( Sub, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), ArithmeticCPUKernel); +MS_REG_CPU_KERNEL( + Pow, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + ArithmeticCPUKernel); +MS_REG_CPU_KERNEL( + Pow, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ArithmeticCPUKernel); +MS_REG_CPU_KERNEL( + Pow, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), + ArithmeticCPUKernel); +MS_REG_CPU_KERNEL( + RealDiv, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + ArithmeticCPUKernel); +MS_REG_CPU_KERNEL( + RealDiv, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ArithmeticCPUKernel); +MS_REG_CPU_KERNEL( + RealDiv, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), + ArithmeticCPUKernel); +MS_REG_CPU_KERNEL( + Less, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool), + ArithmeticCPUKernel); +MS_REG_CPU_KERNEL( + Less, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeBool), + ArithmeticCPUKernel); +MS_REG_CPU_KERNEL( + Less, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeBool), + ArithmeticCPUKernel); MS_REG_CPU_KERNEL( AssignAdd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), ArithmeticCPUKernel); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc index b46ad1d5ab2..fdfb48481f2 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.cc @@ -13,10 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h" #include -#include #include +#include +#include "backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore { @@ -30,9 +30,9 @@ void Square(const T *in, T *out, size_t start, size_t end) { } template -void Sqrt(const T *in, T *out, size_t start, size_t end) { +void Neg(const T *in, T *out, size_t start, size_t end) { for (size_t i = start; i < end; i++) { - out[i] = sqrtf(in[i]); + out[i] = -in[i]; } } } // namespace @@ -42,8 +42,8 @@ void ArithmeticSelfCPUKernel::InitKernel(const CNodePtr &kernel_node) { std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); if (kernel_name == prim::kPrimSquare->name()) { operate_type_ = SQUARE; - } else if (kernel_name == prim::kPrimSqrt->name()) { - operate_type_ = SQRT; + } else if (kernel_name == prim::kPrimNeg->name()) { + operate_type_ = NEG; } dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); } @@ -66,10 +66,11 @@ void ArithmeticSelfCPUKernel::LaunchKernel(const std::vector &inputs const std::vector &outputs) { T *input = reinterpret_cast(inputs[0]->addr); T *output = reinterpret_cast(outputs[0]->addr); - auto lens = inputs[0]->size / sizeof(T); - MS_LOG(INFO) << "lens=" << lens; + size_t lens = outputs[0]->size > 0 ? static_cast(outputs[0]->size / sizeof(T)) : 1; - const size_t thread_num = 24; + auto max_thread_num = std::thread::hardware_concurrency(); + size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; + MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; std::vector threads; threads.reserve(thread_num); size_t start = 0; @@ -78,8 +79,8 @@ void ArithmeticSelfCPUKernel::LaunchKernel(const std::vector &inputs size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size); if (operate_type_ == SQUARE) { threads.emplace_back(std::thread(Square, input, output, start, end)); - } else if (operate_type_ == SQRT) { - threads.emplace_back(std::thread(Sqrt, input, output, start, end)); + } else if (operate_type_ == NEG) { + threads.emplace_back(std::thread(Neg, input, output, start, end)); } start += once_compute_size; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h index 3d3981f1792..ebfeb8411fc 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h @@ -15,8 +15,8 @@ */ #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARITHMETIC_SELF_CPU_KERNEL_H_ #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARITHMETIC_SELF_CPU_KERNEL_H_ -#include #include +#include #include "backend/kernel_compiler/cpu/cpu_kernel.h" #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" @@ -40,10 +40,12 @@ class ArithmeticSelfCPUKernel : public CPUKernel { TypeId dtype_{kTypeUnknown}; }; -MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), - ArithmeticSelfCPUKernel); MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), ArithmeticSelfCPUKernel); +MS_REG_CPU_KERNEL(Neg, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ArithmeticSelfCPUKernel); +MS_REG_CPU_KERNEL(Neg, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + ArithmeticSelfCPUKernel); } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/cast_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/cast_cpu_kernel.cc new file mode 100644 index 00000000000..0f77a362a13 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cast_cpu_kernel.cc @@ -0,0 +1,82 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include "backend/kernel_compiler/cpu/cast_cpu_kernel.h" +#include "runtime/device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { + +template +void Cast(const S *in, T *out, size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + out[i] = static_cast(in[i]); + } +} + +template +void LaunchCast(const std::vector &inputs, const std::vector &outputs) { + S *input = reinterpret_cast(inputs[0]->addr); + T *output = reinterpret_cast(outputs[0]->addr); + MS_LOG(DEBUG) << "Type source: " << typeid(S).name() << "; target: " << typeid(T).name(); + + size_t lens = outputs[0]->size > 0 ? static_cast(outputs[0]->size / sizeof(T)) : 1; + auto max_thread_num = std::thread::hardware_concurrency(); + size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; + MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; + std::vector threads; + threads.reserve(thread_num); + size_t start = 0; + size_t once_compute_size = (lens + thread_num - 1) / thread_num; + while (start < lens) { + size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size); + threads.emplace_back(std::thread(Cast, input, output, start, end)); + start += once_compute_size; + } + for (size_t i = 0; i < threads.size(); ++i) { + threads[i].join(); + } +} + +void CastCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + source_dtype = AnfAlgo::GetPrevNodeOutputDeviceDataType(kernel_node, 0); + target_dtype = AnfAlgo::GetOutputInferDataType(kernel_node, 0); +} + +bool CastCPUKernel::Launch(const std::vector &inputs, + const std::vector & /*workspace*/, + const std::vector &outputs) { + using TypePair = + std::function &, const std::vector &)>; + std::map> mode_map; + mode_map[kNumberTypeFloat32][kNumberTypeFloat32] = LaunchCast; + mode_map[kNumberTypeFloat32][kNumberTypeInt32] = LaunchCast; + mode_map[kNumberTypeFloat32][kNumberTypeBool] = LaunchCast; + mode_map[kNumberTypeInt32][kNumberTypeFloat32] = LaunchCast; + mode_map[kNumberTypeInt32][kNumberTypeInt32] = LaunchCast; + mode_map[kNumberTypeInt32][kNumberTypeBool] = LaunchCast; + mode_map[kNumberTypeBool][kNumberTypeFloat32] = LaunchCast; + mode_map[kNumberTypeBool][kNumberTypeBool] = LaunchCast; + mode_map[kNumberTypeBool][kNumberTypeInt32] = LaunchCast; + mode_map[source_dtype][target_dtype](inputs, outputs); + return true; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/cast_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/cast_cpu_kernel.h new file mode 100644 index 00000000000..5de831af457 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cast_cpu_kernel.h @@ -0,0 +1,54 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CAST_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CAST_CPU_KERNEL_H_ +#include +#include +#include +#include "backend/kernel_compiler/cpu/cpu_kernel.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { + +class CastCPUKernel : public CPUKernel { + public: + CastCPUKernel() = default; + ~CastCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + private: + TypeId source_dtype{kTypeUnknown}; + TypeId target_dtype{kTypeUnknown}; +}; + +MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), CastCPUKernel); +MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32), CastCPUKernel); +MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeBool), CastCPUKernel); +MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), CastCPUKernel); +MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), CastCPUKernel); +MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool), CastCPUKernel); +MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), CastCPUKernel); +MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeInt32), CastCPUKernel); +MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeFloat32), CastCPUKernel); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CAST_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h index da746f39d17..ed157648e86 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h @@ -15,15 +15,14 @@ */ #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CPU_KERNEL_H_ #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CPU_KERNEL_H_ - -#include -#include +#include #include #include -#include +#include +#include #include "backend/kernel_compiler/kernel.h" -#include "ir/anf.h" #include "backend/session/anf_runtime_algorithm.h" +#include "ir/anf.h" using mindspore::kernel::Address; using mindspore::kernel::AddressPtr; @@ -52,7 +51,26 @@ const char END[] = "end"; const char SIZE[] = "size"; const char USE_NESTEROV[] = "use_nesterov"; const char GROUP[] = "group"; -enum OperateType { ADD = 0, SUB, MUL, DIV, SQUARE, SQRT, ASSIGNADD }; + +enum OperateType { + ADD = 0, + SUB, + MUL, + DIV, + SQUARE, + SQRT, + POW, + REALDIV, + NEG, + LESS, + ASSIGNADD, + RELUGRAD, + RELU6GRAD, + ABSGRAD, + TANHGRAD, + SQRTGRAD, + SIGMOIDGRAD +}; class CPUKernel : public kernel::KernelMod { public: diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc new file mode 100644 index 00000000000..f47c5c2a59e --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc @@ -0,0 +1,177 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include "backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h" +#include "runtime/device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { +template +void EltWiseGradCPUKernel::ReluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + if (input2[i] > 0) { + out[i] = input1[i]; + } else { + out[i] = 0; + } + } +} + +template +void EltWiseGradCPUKernel::ReLU6Grad(const T *input1, const T *input2, T *out, size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + if (input2[i] > 0 && input2[i] <= 6) { + out[i] = input1[i]; + } else { + out[i] = 0; + } + } +} + +template +void EltWiseGradCPUKernel::AbsGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + if (input1[i] > 0) { + out[i] = input2[i]; + } else if (input1[i] < 0) { + out[i] = -input2[i]; + } else { + out[i] = 0; + } + } +} + +template +void EltWiseGradCPUKernel::SigmoidGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + out[i] = input2[i] * input1[i] * (1 - input1[i]); + } +} + +template +void EltWiseGradCPUKernel::SqrtGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + out[i] = input2[i] / (input1[i] * 2); + } +} + +template +void EltWiseGradCPUKernel::TanhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + T tmp = (1 - input1[i]); + out[i] = input2[i] * tmp * tmp; + } +} + +void EltWiseGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); + if (kernel_name == "ReluGrad") { + operate_type_ = RELUGRAD; + } else if (kernel_name == "ReLU6Grad") { + operate_type_ = RELU6GRAD; + } else if (kernel_name == "SigmoidGrad") { + operate_type_ = SIGMOIDGRAD; + } else if (kernel_name == "AbsGrad") { + operate_type_ = ABSGRAD; + } else if (kernel_name == "TanhGrad") { + operate_type_ = TANHGRAD; + } else if (kernel_name == "SqrtGrad") { + operate_type_ = SQRTGRAD; + } else { + MS_LOG(EXCEPTION) << "Not support " << kernel_name; + } + + input_shape0_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + input_shape1_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); + if (output_shape_.size() == 0) { + output_shape_.insert(output_shape_.begin(), 1); + } + size_t l = input_shape0_.size(); + for (size_t i = 0; i < output_shape_.size() - l; ++i) { + input_shape0_.insert(input_shape0_.begin(), 1); + } + l = input_shape1_.size(); + for (size_t i = 0; i < output_shape_.size() - l; ++i) { + input_shape1_.insert(input_shape1_.begin(), 1); + } + CPUKernelUtils::GetElementNumEveryDim(input_shape0_, &input_element_num0_); + CPUKernelUtils::GetElementNumEveryDim(input_shape1_, &input_element_num1_); + CPUKernelUtils::GetElementNumEveryDim(output_shape_, &output_element_num_); + dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); + if (dtype_ != AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 1)) { + MS_LOG(EXCEPTION) << "Input0 and input1 must has the same data type"; + } +} + +bool EltWiseGradCPUKernel::Launch(const std::vector &inputs, + const std::vector & /*workspace*/, + const std::vector &outputs) { + if (dtype_ == kNumberTypeInt32) { + LaunchKernel(inputs, outputs); + } else if (dtype_ == kNumberTypeFloat32) { + LaunchKernel(inputs, outputs); + } else if (dtype_ == kNumberTypeInt64) { + LaunchKernel(inputs, outputs); + } else { + MS_LOG(EXCEPTION) << "Only support int32, float32, but actual data type is " << TypeIdLabel(dtype_); + } + return true; +} + +template +void EltWiseGradCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { + T *input1 = reinterpret_cast(inputs[0]->addr); + T *input2 = reinterpret_cast(inputs[1]->addr); + T *output = reinterpret_cast(outputs[0]->addr); + + size_t lens = outputs[0]->size > 0 ? static_cast(outputs[0]->size / sizeof(T)) : 1; + auto max_thread_num = std::thread::hardware_concurrency(); + size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num; + MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num; + std::vector threads; + threads.reserve(thread_num); + size_t start = 0; + size_t once_compute_size = (lens + thread_num - 1) / thread_num; + while (start < lens) { + size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size); + if (operate_type_ == RELUGRAD) { + threads.emplace_back(std::thread(&EltWiseGradCPUKernel::ReluGrad, this, input1, input2, output, start, end)); + } else if (operate_type_ == RELU6GRAD) { + threads.emplace_back(std::thread(&EltWiseGradCPUKernel::ReLU6Grad, this, input1, input2, output, start, end)); + } else if (operate_type_ == ABSGRAD) { + threads.emplace_back(std::thread(&EltWiseGradCPUKernel::AbsGrad, this, input1, input2, output, start, end)); + } else if (operate_type_ == SIGMOIDGRAD) { + threads.emplace_back( + std::thread(&EltWiseGradCPUKernel::SigmoidGrad, this, input1, input2, output, start, end)); + } else if (operate_type_ == TANHGRAD) { + threads.emplace_back(std::thread(&EltWiseGradCPUKernel::TanhGrad, this, input1, input2, output, start, end)); + } else if (operate_type_ == SQRTGRAD) { + threads.emplace_back(std::thread(&EltWiseGradCPUKernel::SqrtGrad, this, input1, input2, output, start, end)); + } else { + MS_LOG(EXCEPTION) << "Not support " << operate_type_; + } + start += once_compute_size; + } + for (size_t i = 0; i < threads.size(); ++i) { + threads[i].join(); + } +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h new file mode 100644 index 00000000000..dcfe0050daf --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h @@ -0,0 +1,87 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_GRAD_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_GRAD_CPU_KERNEL_H_ +#include +#include +#include "backend/kernel_compiler/cpu/cpu_kernel.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +class EltWiseGradCPUKernel : public CPUKernel { + public: + EltWiseGradCPUKernel() = default; + ~EltWiseGradCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + template + void LaunchKernel(const std::vector &inputs, const std::vector &outputs); + + private: + template + void ReluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end); + template + void ReLU6Grad(const T *input1, const T *input2, T *out, size_t start, size_t end); + template + void AbsGrad(const T *input1, const T *input2, T *out, size_t start, size_t end); + template + void SigmoidGrad(const T *input1, const T *input2, T *out, size_t start, size_t end); + template + void SqrtGrad(const T *input1, const T *input2, T *out, size_t start, size_t end); + template + void TanhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end); + std::vector input_shape0_; + std::vector input_shape1_; + std::vector input_element_num0_; + std::vector input_element_num1_; + std::vector output_shape_; + std::vector output_element_num_; + OperateType operate_type_{RELUGRAD}; + TypeId dtype_{kTypeUnknown}; +}; + +MS_REG_CPU_KERNEL( + ReluGrad, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseGradCPUKernel); +MS_REG_CPU_KERNEL( + ReLU6Grad, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseGradCPUKernel); +MS_REG_CPU_KERNEL( + AbsGrad, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseGradCPUKernel); +MS_REG_CPU_KERNEL( + SigmoidGrad, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseGradCPUKernel); +MS_REG_CPU_KERNEL( + SqrtGrad, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseGradCPUKernel); +MS_REG_CPU_KERNEL( + TanhGrad, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseGradCPUKernel); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_GRAD_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc new file mode 100644 index 00000000000..6d909d8dc5e --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc @@ -0,0 +1,76 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h" + +#include +#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" +#include "runtime/device/cpu/cpu_device_address.h" +#include "utils/ms_utils.h" + +namespace mindspore { +namespace kernel { +dnnl::eltwise_forward::desc EltWiseCPUKernel::GetForwardEltwiseDesc(const CNodePtr &kernel_node, + dnnl::memory::desc src_desc) { + std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); + if (kernel_name == "ReLU") { + return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_relu, src_desc, 0.0); + } else if (kernel_name == "ReLU6") { + return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_clip, src_desc, 0.0, 6.0); + } else if (kernel_name == "Abs") { + return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_abs, src_desc); + } else if (kernel_name == "Exp") { + return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_exp, src_desc); + } else if (kernel_name == "Log") { + return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_log, src_desc); + } else if (kernel_name == "Sigmoid") { + return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_logistic, src_desc); + } else if (kernel_name == "Sqrt") { + return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_sqrt, src_desc); + } else if (kernel_name == "Square") { + return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_square, src_desc); + } else if (kernel_name == "Tanh") { + return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_tanh, src_desc); + } else { + MS_LOG(EXCEPTION) << "Eltwise operators don't support " << kernel_name; + } +} + +void EltWiseCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); + + auto desc = GetForwardEltwiseDesc(kernel_node, src_desc); + auto prim_desc = dnnl::eltwise_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); + primitive_ = std::make_shared(prim_desc); + + AddArgument(DNNL_ARG_SRC, src_desc); + AddArgument(DNNL_ARG_DST, src_desc); +} + +bool EltWiseCPUKernel::Launch(const std::vector &inputs, + const std::vector & /*workspace*/, + const std::vector &outputs) { + if (inputs.empty() || outputs.empty()) { + MS_LOG(EXCEPTION) << "error input output size!"; + } + SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); + SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); + ExecutePrimitive(); + return true; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h new file mode 100644 index 00000000000..d9d0f49d31c --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h @@ -0,0 +1,60 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_CPU_KERNEL_H_ +#include +#include +#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" + +namespace mindspore { +namespace kernel { +class EltWiseCPUKernel : public MKLCPUKernel { + public: + EltWiseCPUKernel() = default; + ~EltWiseCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + private: + dnnl::eltwise_forward::desc GetForwardEltwiseDesc(const CNodePtr &kernel_node, dnnl::memory::desc src_desc); + dnnl::prop_kind DnnlForward = dnnl::prop_kind::forward_training; +}; + +MS_REG_CPU_KERNEL(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseCPUKernel); +MS_REG_CPU_KERNEL(ReLU6, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseCPUKernel); +MS_REG_CPU_KERNEL(Abs, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseCPUKernel); +MS_REG_CPU_KERNEL(Exp, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseCPUKernel); +MS_REG_CPU_KERNEL(Log, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseCPUKernel); +MS_REG_CPU_KERNEL(Sigmoid, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseCPUKernel); +MS_REG_CPU_KERNEL(Sqrt, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseCPUKernel); +MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseCPUKernel); +MS_REG_CPU_KERNEL(Tanh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseCPUKernel); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_cpu_kernel.cc index a33886e92d8..ffefe32513f 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_cpu_kernel.cc @@ -13,12 +13,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include #include "backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_cpu_kernel.h" -#include "utils/ms_utils.h" #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" #include "runtime/device/cpu/cpu_device_address.h" +#include "utils/ms_utils.h" namespace mindspore { namespace kernel { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_cpu_kernel.h index 8571857f901..4c1d99a9085 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_cpu_kernel.h @@ -15,9 +15,8 @@ */ #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_CPU_KERNEL_H_ #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_CPU_KERNEL_H_ - -#include #include +#include #include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" namespace mindspore { @@ -74,4 +73,4 @@ MS_REG_CPU_KERNEL(BatchNorm, } // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONV2D_CPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_gard_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_gard_cpu_kernel.cc new file mode 100644 index 00000000000..bddf7c5892a --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_gard_cpu_kernel.cc @@ -0,0 +1,110 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_gard_cpu_kernel.h" + +#include +#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" +#include "runtime/device/cpu/cpu_device_address.h" +#include "utils/ms_utils.h" + +namespace mindspore { +namespace kernel { +void FusedBatchNormGradCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { + CPUKernel::InitInputOutputSize(kernel_node); + MS_EXCEPTION_IF_NULL(kernel_node); + size_t type_size = sizeof(float); + std::vector shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + size_t tensor_size = shape[1] * 2 * type_size; + // [2, c] to store scale and bias + workspace_size_list_.emplace_back(tensor_size); + // [2, c] to store diff_scale and diff_bias + workspace_size_list_.emplace_back(tensor_size); +} + +void FusedBatchNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + std::vector x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + if (x_shape.size() != 4) { + MS_LOG(EXCEPTION) << "Fused batchnorm only support nchw input!"; + } + batch_size = x_shape[0]; + channel = x_shape[1]; + hw_size = x_shape[2] * x_shape[3]; + nhw_size = x_shape[0] * hw_size; + dnnl::memory::desc x_desc = GetDefaultMemDesc(x_shape); + dnnl::memory::desc scale_bias_desc = GetDefaultMemDesc({2, channel}); + auto epsilon = AnfAlgo::GetNodeAttr(kernel_node, "epsilon"); + auto prop_kind = dnnl::prop_kind::forward_training; + auto normalization_flags = dnnl::normalization_flags::use_scale_shift; + + // fused batch normalization forward description + dnnl::batch_normalization_forward::desc desc = + dnnl::batch_normalization_forward::desc(prop_kind, x_desc, epsilon, normalization_flags); + auto forward_prim_desc = dnnl::batch_normalization_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); + + // fused batch normalization backward description + dnnl::batch_normalization_backward::desc backward_desc = + dnnl::batch_normalization_backward::desc(dnnl::prop_kind::backward, x_desc, x_desc, epsilon, normalization_flags); + auto backward_prim_desc = dnnl::batch_normalization_backward::primitive_desc( + backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc); + primitive_ = std::make_shared(backward_prim_desc); + AddArgument(DNNL_ARG_SRC, x_desc); + AddArgument(DNNL_ARG_MEAN, forward_prim_desc.mean_desc()); + AddArgument(DNNL_ARG_VARIANCE, forward_prim_desc.variance_desc()); + AddArgument(DNNL_ARG_SCALE_SHIFT, scale_bias_desc); + AddArgument(DNNL_ARG_WORKSPACE, forward_prim_desc.workspace_desc()); + AddArgument(DNNL_ARG_DST, x_desc); + AddArgument(DNNL_ARG_DIFF_DST, x_desc); + AddArgument(DNNL_ARG_DIFF_SRC, x_desc); + AddArgument(DNNL_ARG_DIFF_SCALE_SHIFT, scale_bias_desc); +} + +bool FusedBatchNormGradCPUKernel::Launch(const std::vector &inputs, + const std::vector &workspace, + const std::vector &outputs) { + if (inputs.size() < 5 || outputs.empty()) { + MS_LOG(EXCEPTION) << "Error input output size!"; + } + auto wksp_in = reinterpret_cast(workspace[0]->addr); + auto scale_ret = memcpy_s(wksp_in, workspace[0]->size, inputs[2]->addr, inputs[2]->size); + auto max_size = workspace[0]->size - inputs[2]->size; + auto bias_ret = memcpy_s(wksp_in + (inputs[2]->size / sizeof(float)), max_size, inputs[3]->addr, inputs[3]->size); + if (scale_ret != 0 || bias_ret != 0) { + MS_LOG(EXCEPTION) << "Memcpy_s error."; + return false; + } + + SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr); + SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr); + SetArgumentHandle(DNNL_ARG_MEAN, inputs[4]->addr); + SetArgumentHandle(DNNL_ARG_VARIANCE, inputs[5]->addr); + SetArgumentHandle(DNNL_ARG_SCALE_SHIFT, workspace[0]->addr); + SetArgumentHandle(DNNL_ARG_DIFF_SRC, outputs[0]->addr); + SetArgumentHandle(DNNL_ARG_DIFF_SCALE_SHIFT, workspace[1]->addr); + ExecutePrimitive(); + + auto wksp_out = reinterpret_cast(workspace[1]->addr); + auto diff_scale_ret = memcpy_s(outputs[1]->addr, outputs[1]->size, wksp_out, inputs[2]->size); + auto diff_bias_ret = + memcpy_s(outputs[2]->addr, outputs[2]->size, wksp_out + (outputs[1]->size / sizeof(float)), inputs[3]->size); + if (diff_scale_ret != 0 || diff_bias_ret != 0) { + MS_LOG(EXCEPTION) << "Memcpy_s error."; + return false; + } + return true; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_gard_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_gard_cpu_kernel.h new file mode 100644 index 00000000000..2688191d345 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_gard_cpu_kernel.h @@ -0,0 +1,61 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_GRAD_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_GRAD_CPU_KERNEL_H_ +#include +#include +#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" + +namespace mindspore { +namespace kernel { +class FusedBatchNormGradCPUKernel : public MKLCPUKernel { + public: + FusedBatchNormGradCPUKernel() = default; + ~FusedBatchNormGradCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + protected: + void InitInputOutputSize(const CNodePtr &kernel_node) override; + + private: + float momentum{0.9}; + size_t batch_size{0}; + size_t channel{0}; + size_t hw_size{0}; + size_t nhw_size{0}; +}; + +MS_REG_CPU_KERNEL(FusedBatchNormGradCPU, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + FusedBatchNormGradCPUKernel) + +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_GRAD_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.cc index a59cc3c4b69..6c2af2f87db 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.cc @@ -25,24 +25,53 @@ void MulCPUKernel::InitKernel(const CNodePtr &kernel_node) { std::vector src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); std::vector src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); std::vector dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); - if (src0_shape.size() != src1_shape.size() && src1_shape.size() > 1) { - MS_LOG(EXCEPTION) << "mul only support same dim input or tensor * scalar " << src0_shape.size() << " vs " - << src1_shape.size(); - } - if (src1_shape.size() < src0_shape.size()) { - for (size_t i = src1_shape.size(); i < src0_shape.size(); ++i) { - src1_shape.emplace_back(1); + if (src1_shape.size() != src0_shape.size()) { + if (src0_shape.size() == 0) { + need_swap_ = true; + for (size_t i = 0; i < src1_shape.size(); ++i) { + src0_shape.emplace_back(1); + } + } else if (src1_shape.size() == 0) { + for (size_t i = 0; i < src0_shape.size(); ++i) { + src1_shape.emplace_back(1); + } + } else { + MS_LOG(EXCEPTION) << "Invalid broadcast! " << src0_shape << " vs " << src1_shape; + } + } else { + bool visit_src0 = false; + bool visit_src1 = false; + for (size_t i = 0; i < src0_shape.size(); ++i) { + if (src0_shape[i] != src1_shape[i]) { + if (src0_shape[i] == 1 && !visit_src1) { + need_swap_ = true; + visit_src0 = true; + } else if (src1_shape[i] == 1 && !visit_src0) { + need_swap_ = false; + visit_src1 = true; + } else { + MS_LOG(EXCEPTION) << "Invalid broadcast! " << src0_shape << " vs " << src1_shape; + } + } } } - dnnl::memory::desc src0_mem_desc = GetDefaultMemDesc(src0_shape); - dnnl::memory::desc src1_mem_desc = GetDefaultMemDesc(src1_shape); - dnnl::memory::desc dst_mem_desc = GetDefaultMemDesc(dst_shape); - dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_mul, src0_mem_desc, src1_mem_desc, dst_mem_desc); + dnnl::memory::desc src0_desc; + dnnl::memory::desc src1_desc; + if (need_swap_) { + src0_desc = GetDefaultMemDesc(src1_shape); + src1_desc = GetDefaultMemDesc(src0_shape); + } else { + src0_desc = GetDefaultMemDesc(src0_shape); + src1_desc = GetDefaultMemDesc(src1_shape); + } + dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); + dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_mul, src0_desc, src1_desc, dst_desc); auto prim_desc = dnnl::binary::primitive_desc(desc, MKLKernelEngine::Get().engine()); primitive_ = std::make_shared(prim_desc); - AddArgument(DNNL_ARG_SRC_0, src0_mem_desc); - AddArgument(DNNL_ARG_SRC_1, src1_mem_desc); - AddArgument(DNNL_ARG_DST, dst_mem_desc); + + AddArgument(DNNL_ARG_SRC_0, src0_desc); + AddArgument(DNNL_ARG_SRC_1, src1_desc); + AddArgument(DNNL_ARG_DST, dst_desc); } bool MulCPUKernel::Launch(const std::vector &inputs, @@ -51,8 +80,13 @@ bool MulCPUKernel::Launch(const std::vector &inputs, if (inputs.size() < 2 || outputs.empty()) { MS_LOG(EXCEPTION) << "mul error input output size!"; } - SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr); - SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr); + if (need_swap_) { + SetArgumentHandle(DNNL_ARG_SRC_0, inputs[1]->addr); + SetArgumentHandle(DNNL_ARG_SRC_1, inputs[0]->addr); + } else { + SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr); + SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr); + } SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); ExecutePrimitive(); return true; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h index d67626deca3..19cf70a0d5e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mul_cpu_kernel.h @@ -31,6 +31,9 @@ class MulCPUKernel : public MKLCPUKernel { bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) override; + + private: + bool need_swap_{false}; }; MS_REG_CPU_KERNEL( diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.cc deleted file mode 100644 index 2dce57d11c8..00000000000 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.cc +++ /dev/null @@ -1,59 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include "backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h" -#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" -#include "runtime/device/cpu/cpu_device_address.h" -#include "utils/ms_utils.h" - -namespace mindspore { -namespace kernel { -void ReluCPUKernel::InitKernel(const CNodePtr &kernel_node) { - MS_EXCEPTION_IF_NULL(kernel_node); - std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); - if (src_shape.size() != 4 && src_shape.size() != 2) { - MS_LOG(EXCEPTION) << "relu kernel dims invalid " << src_shape.size(); - } - dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); - - dnnl::eltwise_forward::desc desc = - dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_relu, src_desc, 0.0); - std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); - if (kernel_name == "ReLU6") { - desc = - dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_clip, src_desc, 0.0, 6.0); - } - - auto prim_desc = dnnl::eltwise_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); - primitive_ = std::make_shared(prim_desc); - - AddArgument(DNNL_ARG_SRC, src_desc); - AddArgument(DNNL_ARG_DST, src_desc); -} - -bool ReluCPUKernel::Launch(const std::vector &inputs, - const std::vector & /*workspace*/, - const std::vector &outputs) { - if (inputs.empty() || outputs.empty()) { - MS_LOG(EXCEPTION) << "error input output size!"; - } - SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); - SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); - ExecutePrimitive(); - return true; -} -} // namespace kernel -} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h deleted file mode 100644 index 6e90f930878..00000000000 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h +++ /dev/null @@ -1,42 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_CPU_KERNEL_H_ - -#include -#include -#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" - -namespace mindspore { -namespace kernel { -class ReluCPUKernel : public MKLCPUKernel { - public: - ReluCPUKernel() = default; - ~ReluCPUKernel() override = default; - - void InitKernel(const CNodePtr &kernel_node) override; - - bool Launch(const std::vector &inputs, const std::vector &workspace, - const std::vector &outputs) override; -}; - -MS_REG_CPU_KERNEL(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), ReluCPUKernel); -MS_REG_CPU_KERNEL(ReLU6, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), - ReluCPUKernel); -} // namespace kernel -} // namespace mindspore - -#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.cc deleted file mode 100644 index 3188b0622ca..00000000000 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.cc +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h" -#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" -#include "runtime/device/cpu/cpu_device_address.h" -#include "utils/ms_utils.h" - -namespace mindspore { -namespace kernel { -void ReluGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { - MS_EXCEPTION_IF_NULL(kernel_node); - std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); - if (src_shape.size() != 4 && src_shape.size() != 2) { - MS_LOG(EXCEPTION) << "relu grad kernel dims invalid " << src_shape.size(); - } - dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); - - dnnl::eltwise_forward::desc forward_desc = - dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_relu, src_desc, 0.0); - auto forward_prim_desc = dnnl::eltwise_forward::primitive_desc(forward_desc, MKLKernelEngine::Get().engine()); - - dnnl::eltwise_backward::desc backward_desc = - dnnl::eltwise_backward::desc(dnnl::algorithm::eltwise_relu, src_desc, src_desc, 0.0, 0.0); - auto backward_prim_desc = - dnnl::eltwise_backward::primitive_desc(backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc); - primitive_ = std::make_shared(backward_prim_desc); - - AddArgument(DNNL_ARG_SRC, src_desc); - AddArgument(DNNL_ARG_DIFF_SRC, src_desc); - AddArgument(DNNL_ARG_DIFF_DST, src_desc); -} - -bool ReluGradCPUKernel::Launch(const std::vector &inputs, - const std::vector & /*workspace*/, - const std::vector &outputs) { - if (inputs.size() < 2 || outputs.empty()) { - MS_LOG(EXCEPTION) << "relu grad error input output size!"; - } - if (inputs[0]->size != outputs[0]->size) { - MS_LOG(EXCEPTION) << "relu grad error input output data size!"; - } - - SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr); - SetArgumentHandle(DNNL_ARG_DIFF_SRC, inputs[0]->addr); - SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr); - ExecutePrimitive(); - size_t mem_bits = outputs[0]->size; - auto ret = memcpy_s(outputs[0]->addr, mem_bits, inputs[0]->addr, mem_bits); - if (ret != 0) { - MS_LOG(EXCEPTION) << "memcpy_s error, errorno " << ret; - return false; - } - return true; -} -} // namespace kernel -} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h deleted file mode 100644 index ff418ae3169..00000000000 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_GRAD_CPU_KERNEL_H_ -#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_GRAD_CPU_KERNEL_H_ - -#include -#include -#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h" - -namespace mindspore { -namespace kernel { -class ReluGradCPUKernel : public MKLCPUKernel { - public: - ReluGradCPUKernel() = default; - ~ReluGradCPUKernel() override = default; - - void InitKernel(const CNodePtr &kernel_node) override; - - bool Launch(const std::vector &inputs, const std::vector &workspace, - const std::vector &outputs) override; -}; - -MS_REG_CPU_KERNEL( - ReluGrad, - KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), - ReluGradCPUKernel); -} // namespace kernel -} // namespace mindspore - -#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_GRAD_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/tensoradd_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/tensoradd_cpu_kernel.cc index aa65a281046..85e74e27d65 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/tensoradd_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/tensoradd_cpu_kernel.cc @@ -25,17 +25,45 @@ void TensorAddCPUKernel::InitKernel(const CNodePtr &kernel_node) { std::vector src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); std::vector src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); std::vector dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); - if (src0_shape.size() != src1_shape.size() && src1_shape.size() > 1) { - MS_LOG(EXCEPTION) << "TensorAdd only support same dim input or tensor * scalar " << src0_shape.size() << " vs " - << src1_shape.size(); - } - if (src1_shape.size() < src0_shape.size()) { - for (size_t i = src1_shape.size(); i < src0_shape.size(); ++i) { - src1_shape.emplace_back(1); + if (src1_shape.size() != src0_shape.size()) { + if (src0_shape.size() == 0) { + need_swap_ = true; + for (size_t i = 0; i < src1_shape.size(); ++i) { + src0_shape.emplace_back(1); + } + } else if (src1_shape.size() == 0) { + for (size_t i = 0; i < src0_shape.size(); ++i) { + src1_shape.emplace_back(1); + } + } else { + MS_LOG(EXCEPTION) << "Invalid broadcast! " << src0_shape << " vs " << src1_shape; + } + } else { + bool visit_src0 = false; + bool visit_src1 = false; + for (size_t i = 0; i < src0_shape.size(); ++i) { + if (src0_shape[i] != src1_shape[i]) { + if (src0_shape[i] == 1 && !visit_src1) { + need_swap_ = true; + visit_src0 = true; + } else if (src1_shape[i] == 1 && !visit_src0) { + need_swap_ = false; + visit_src1 = true; + } else { + MS_LOG(EXCEPTION) << "Invalid broadcast! " << src0_shape << " vs " << src1_shape; + } + } } } - dnnl::memory::desc src0_desc = GetDefaultMemDesc(src0_shape); - dnnl::memory::desc src1_desc = GetDefaultMemDesc(src1_shape); + dnnl::memory::desc src0_desc; + dnnl::memory::desc src1_desc; + if (need_swap_) { + src0_desc = GetDefaultMemDesc(src1_shape); + src1_desc = GetDefaultMemDesc(src0_shape); + } else { + src0_desc = GetDefaultMemDesc(src0_shape); + src1_desc = GetDefaultMemDesc(src1_shape); + } dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_add, src0_desc, src1_desc, dst_desc); auto prim_desc = dnnl::binary::primitive_desc(desc, MKLKernelEngine::Get().engine()); @@ -51,8 +79,13 @@ bool TensorAddCPUKernel::Launch(const std::vector &inputs, if (inputs.size() < 2 || outputs.empty()) { MS_LOG(EXCEPTION) << "TensorAdd error input output size!"; } - SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr); - SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr); + if (need_swap_) { + SetArgumentHandle(DNNL_ARG_SRC_0, inputs[1]->addr); + SetArgumentHandle(DNNL_ARG_SRC_1, inputs[0]->addr); + } else { + SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr); + SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr); + } SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); ExecutePrimitive(); return true; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/tensoradd_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/tensoradd_cpu_kernel.h index 347e70ebb4e..0ede516da99 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/tensoradd_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/tensoradd_cpu_kernel.h @@ -31,6 +31,9 @@ class TensorAddCPUKernel : public MKLCPUKernel { bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) override; + + private: + bool need_swap_{false}; }; MS_REG_CPU_KERNEL( diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.h index 282c55133f2..e7a875b1c0f 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/reshape_cpu_kernel.h @@ -39,6 +39,7 @@ MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutput ReshapeCPUKernel); MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), ReshapeCPUKernel); +MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), ReshapeCPUKernel); MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), ReshapeCPUKernel); @@ -46,6 +47,7 @@ MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutput ReshapeCPUKernel); MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), ReshapeCPUKernel); +MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), ReshapeCPUKernel); MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), ReshapeCPUKernel); @@ -53,6 +55,8 @@ MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOut ReshapeCPUKernel); MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), ReshapeCPUKernel); +MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), + ReshapeCPUKernel); } // namespace kernel } // namespace mindspore diff --git a/mindspore/ops/_grad/grad_nn_ops.py b/mindspore/ops/_grad/grad_nn_ops.py index 3853afeaaa5..b1654c3cfc3 100755 --- a/mindspore/ops/_grad/grad_nn_ops.py +++ b/mindspore/ops/_grad/grad_nn_ops.py @@ -560,11 +560,17 @@ def get_bprop_gelu(self): def get_bprop_fused_batch_norm(self): """Grad definition for `FusedBatchNorm` operation.""" input_grad = G.FusedBatchNormGrad(self.epsilon, self.momentum) - + target_cpu = False + if self.target == "CPU": + input_grad = G.FusedBatchNormGradCPU(self.epsilon, self.momentum) + target_cpu = True def bprop(x, scale, b, mean, variance, out, dout): saved_mean = out[3] saved_variance = out[4] - out = input_grad(dout[0], x, scale, saved_mean, saved_variance) + if target_cpu: + out = input_grad(dout[0], x, scale, b, saved_mean, saved_variance) + else: + out = input_grad(dout[0], x, scale, saved_mean, saved_variance) dx = out[0] dscale = out[1] dbias = out[2] diff --git a/mindspore/ops/operations/_grad_ops.py b/mindspore/ops/operations/_grad_ops.py index cc61c9c4c77..ddbd2beb183 100644 --- a/mindspore/ops/operations/_grad_ops.py +++ b/mindspore/ops/operations/_grad_ops.py @@ -540,6 +540,22 @@ class FusedBatchNormGrad(Primitive): raise NotImplementedError +class FusedBatchNormGradCPU(PrimitiveWithInfer): + """Gradients of FusedBatchNorm operation for CPU.""" + + @prim_attr_register + def __init__(self, epsilon=0.0, momentum=0.1): + self.init_prim_io_names(inputs=['dy', 'x', 'scale', 'bias', 'save_mean', 'save_inv_variance'], + outputs=['dx', 'bn_scale', 'bn_bias']) + self.add_prim_attr('data_format', "NCHW") + + def infer_shape(self, dy_shape, x_shape, scale_shape, bias_shape, save_mean_shape, save_inv_variance_shape): + return (x_shape, scale_shape, bias_shape) + + def infer_dtype(self, dy_type, x_type, scale_type, bias_type, save_mean_type, save_inv_variance_type): + return (x_type, scale_type, bias_type) + + class FusedBatchNormGradEx(PrimitiveWithInfer): """Gradients of FusedBatchNormEx operation.""" diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py index 4bf5663c0f5..7d34825f27a 100644 --- a/mindspore/ops/operations/nn_ops.py +++ b/mindspore/ops/operations/nn_ops.py @@ -640,6 +640,7 @@ class FusedBatchNorm(Primitive): self.epsilon = validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', self.name) self.momentum = validator.check_float_range(momentum, 0, 1, Rel.INC_BOTH, 'momentum', self.name) self._update_parameter = True + self.target = context.get_context("device_target") class FusedBatchNormEx(PrimitiveWithInfer): diff --git a/tests/st/ops/cpu/test_abs_op.py b/tests/st/ops/cpu/test_abs_op.py new file mode 100644 index 00000000000..d5e8d7a5392 --- /dev/null +++ b/tests/st/ops/cpu/test_abs_op.py @@ -0,0 +1,60 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.common.api import ms_function +from mindspore.ops import operations as P +from mindspore.ops.composite import GradOperation + +context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + + +class Grad(nn.Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.grad = GradOperation(get_all=True, sens_param=True) + self.network = network + + @ms_function + def construct(self, input_, output_grad): + return self.grad(self.network)(input_, output_grad) + + +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.ops = P.Abs() + + def construct(self, x): + return self.ops(x) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_net(): + x = np.random.randn(2, 3, 3, 4).astype(np.float32) + y_expect = np.abs(x) + net = Net() + out = net(Tensor(x)) + assert (out.asnumpy() == y_expect).all() + sens = np.random.randn(2, 3, 3, 4).astype(np.float32) + backword_net = Grad(Net()) + output = backword_net(Tensor(x), Tensor(sens)) + print(len(output)) + print(output[0].asnumpy()) diff --git a/tests/st/ops/cpu/test_batchnorm_op.py b/tests/st/ops/cpu/test_batchnorm_op.py index e020354f8b3..1fd232358ea 100644 --- a/tests/st/ops/cpu/test_batchnorm_op.py +++ b/tests/st/ops/cpu/test_batchnorm_op.py @@ -80,3 +80,39 @@ def test_train_forward(): bn_net = Batchnorm_Net(2, Tensor(weight), Tensor(bias), Tensor(moving_mean), Tensor(moving_var_init)) bn_net.set_train(False) output = bn_net(Tensor(x)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_train_backward(): + x = np.array([[ + [[1, 3, 3, 5], [2, 4, 6, 8], [3, 6, 7, 7], [4, 3, 8, 2]], + [[5, 7, 6, 3], [3, 5, 6, 7], [9, 4, 2, 5], [7, 5, 8, 1]]]]).astype(np.float32) + grad = np.array([[ + [[1, 2, 7, 1], [4, 2, 1, 3], [1, 6, 5, 2], [2, 4, 3, 2]], + [[9, 4, 3, 5], [1, 3, 7, 6], [5, 7, 9, 9], [1, 4, 6, 8]]]]).astype(np.float32) + expect_output = np.array([[[[-0.69126546, -0.32903028, 1.9651246, -0.88445705], + [0.6369296, -0.37732816, -0.93275493, -0.11168876], + [-0.7878612, 1.3614, 0.8542711, -0.52222186], + [-0.37732816, 0.5886317, -0.11168876, -0.28073236]], + + [[1.6447213, -0.38968924, -1.0174079, -0.55067265], + [-2.4305856, -1.1751484, 0.86250514, 0.5502673], + [0.39576983, 0.5470243, 1.1715001, 1.6447213], + [-1.7996241, -0.7051701, 0.7080077, 0.5437813]]]]).astype(np.float32) + + weight = Tensor(np.ones(2).astype(np.float32)) + bias = Tensor(np.ones(2).astype(np.float32)) + moving_mean = Tensor(np.ones(2).astype(np.float32)) + moving_var_init = Tensor(np.ones(2).astype(np.float32)) + error = np.ones(shape=[1, 2, 4, 4]) * 1.0e-6 + + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + bn_net = Batchnorm_Net(2, weight, bias, moving_mean, moving_var_init) + bn_net.set_train() + bn_grad = Grad(bn_net) + output = bn_grad(Tensor(x), Tensor(grad)) + diff = output[0].asnumpy() - expect_output + assert np.all(diff < error) + assert np.all(-diff < error) diff --git a/tests/st/ops/cpu/test_cast_op.py b/tests/st/ops/cpu/test_cast_op.py new file mode 100644 index 00000000000..b75110b2639 --- /dev/null +++ b/tests/st/ops/cpu/test_cast_op.py @@ -0,0 +1,76 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.common.dtype as mstype +import mindspore.context as context +from mindspore.common.tensor import Tensor +from mindspore.nn import Cell +from mindspore.ops import operations as P + + +class Net(Cell): + def __init__(self, dtype): + super(Net, self).__init__() + self.Cast = P.Cast() + self.dtype = dtype + + def construct(self, x): + return self.Cast(x, self.dtype) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_cast_int32(): + x0 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.float32)) + x1 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.int32)) + x2 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.bool)) + t = mstype.int32 + + context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + net = Net(t) + output = net(x0) + type0 = output.asnumpy().dtype + assert type0 == 'int32' + output = net(x1) + type1 = output.asnumpy().dtype + assert type1 == 'int32' + output = net(x2) + type2 = output.asnumpy().dtype + assert type2 == 'int32' + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_cast_float32(): + x0 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.float32)) + x1 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.int32)) + x2 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.bool)) + t = mstype.float32 + + context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + net = Net(t) + output = net(x0) + type0 = output.asnumpy().dtype + assert type0 == 'float32' + output = net(x1) + type1 = output.asnumpy().dtype + assert type1 == 'float32' + output = net(x2) + type2 = output.asnumpy().dtype + assert type2 == 'float32' diff --git a/tests/st/ops/cpu/test_exp_op.py b/tests/st/ops/cpu/test_exp_op.py new file mode 100644 index 00000000000..fc85ae92670 --- /dev/null +++ b/tests/st/ops/cpu/test_exp_op.py @@ -0,0 +1,56 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P + + +class NetExp(nn.Cell): + def __init__(self): + super(NetExp, self).__init__() + self.exp = P.Exp() + + def construct(self, x): + return self.exp(x) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_exp(): + x0_np = np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32) + x1_np = np.random.uniform(-2, 2, 1).astype(np.float32) + x0 = Tensor(x0_np) + x1 = Tensor(x1_np) + expect0 = np.exp(x0_np) + expect1 = np.exp(x1_np) + error0 = np.ones(shape=expect0.shape) * 1.0e-5 + error1 = np.ones(shape=expect1.shape) * 1.0e-5 + + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + exp = NetExp() + output0 = exp(x0) + diff0 = output0.asnumpy() - expect0 + assert np.all(diff0 < error0) + assert output0.shape == expect0.shape + output1 = exp(x1) + diff1 = output1.asnumpy() - expect1 + assert np.all(diff1 < error1) + assert output1.shape == expect1.shape diff --git a/tests/st/ops/cpu/test_less_op.py b/tests/st/ops/cpu/test_less_op.py new file mode 100644 index 00000000000..4195bd4ab20 --- /dev/null +++ b/tests/st/ops/cpu/test_less_op.py @@ -0,0 +1,83 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np +import pytest +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P + + +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.ops = P.Less() + + def construct(self, x, y): + return self.ops(x, y) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu_training +@pytest.mark.env_onecard +def test_net(): + x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) + y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) + x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) + y1_np = np.random.randint(1, 5, (2, 1, 4, 4)).astype(np.float32) + x2_np = np.random.randint(1, 5, (2, 1, 1, 4)).astype(np.float32) + y2_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) + x3_np = np.random.randint(1, 5, 1).astype(np.float32) + y3_np = np.random.randint(1, 5, 1).astype(np.float32) + x4_np = np.array(768).astype(np.float32) + y4_np = np.array(3072.5).astype(np.float32) + + x0 = Tensor(x0_np) + y0 = Tensor(y0_np) + x1 = Tensor(x1_np) + y1 = Tensor(y1_np) + x2 = Tensor(x2_np) + y2 = Tensor(y2_np) + x3 = Tensor(x3_np) + y3 = Tensor(y3_np) + x4 = Tensor(x4_np) + y4 = Tensor(y4_np) + + context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + net = Net() + out = net(x0, y0).asnumpy() + expect = x0_np < y0_np + assert np.all(out == expect) + assert out.shape == expect.shape + + out = net(x1, y1).asnumpy() + expect = x1_np < y1_np + assert np.all(out == expect) + assert out.shape == expect.shape + + out = net(x2, y2).asnumpy() + expect = x2_np < y2_np + assert np.all(out == expect) + assert out.shape == expect.shape + + out = net(x3, y3).asnumpy() + expect = x3_np < y3_np + assert np.all(out == expect) + assert out.shape == expect.shape + + out = net(x4, y4).asnumpy() + expect = x4_np < y4_np + assert np.all(out == expect) + assert out.shape == expect.shape diff --git a/tests/st/ops/cpu/test_log_op.py b/tests/st/ops/cpu/test_log_op.py new file mode 100644 index 00000000000..bca7a83d6b4 --- /dev/null +++ b/tests/st/ops/cpu/test_log_op.py @@ -0,0 +1,56 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P + + +class NetLog(nn.Cell): + def __init__(self): + super(NetLog, self).__init__() + self.log = P.Log() + + def construct(self, x): + return self.log(x) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_log(): + x0_np = np.random.uniform(1, 2, (2, 3, 4, 4)).astype(np.float32) + x1_np = np.random.uniform(1, 2, 1).astype(np.float32) + x0 = Tensor(x0_np) + x1 = Tensor(x1_np) + expect0 = np.log(x0_np) + expect1 = np.log(x1_np) + error0 = np.ones(shape=expect0.shape) * 1.0e-5 + error1 = np.ones(shape=expect1.shape) * 1.0e-5 + + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + log = NetLog() + output0 = log(x0) + output1 = log(x1) + diff0 = output0.asnumpy() - expect0 + assert np.all(diff0 < error0) + assert output0.shape == expect0.shape + diff1 = output1.asnumpy() - expect1 + assert np.all(diff1 < error1) + assert output1.shape == expect1.shape diff --git a/tests/st/ops/cpu/test_mul_op.py b/tests/st/ops/cpu/test_mul_op.py index ee9120505a1..88d1e71eefd 100644 --- a/tests/st/ops/cpu/test_mul_op.py +++ b/tests/st/ops/cpu/test_mul_op.py @@ -16,38 +16,53 @@ import numpy as np import pytest -import mindspore.context as context +import mindspore.common.dtype as mstype import mindspore.nn as nn -from mindspore import Tensor +from mindspore import Tensor, context from mindspore.common.api import ms_function -from mindspore.common.initializer import initializer -from mindspore.common.parameter import Parameter from mindspore.ops import operations as P -x = np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32) -y = np.random.uniform(-2, 2, (1, 1, 1, 1)).astype(np.float32) - -context.set_context(device_target='CPU') +context.set_context(mode=context.GRAPH_MODE, device_target='CPU') class Net(nn.Cell): def __init__(self): super(Net, self).__init__() self.mul = P.Mul() - self.x = Parameter(initializer(Tensor(x), x.shape), name='x3') - self.y = Parameter(initializer(Tensor(y), y.shape), name='y3') @ms_function - def construct(self): - return self.mul(self.x, self.y) + def construct(self, x, y): + return self.mul(x, y) @pytest.mark.level0 @pytest.mark.platform_x86_cpu @pytest.mark.env_onecard -def test_Mul(): +def test_mul(): + x0 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)) + y0 = Tensor(np.random.uniform(-2, 2, (1, 1, 1, 1)).astype(np.float32)) + x1 = Tensor(np.random.uniform(-2, 2, (1, 3, 1, 4)).astype(np.float32)) + y1 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)) + x2 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)) + y2 = Tensor(2, mstype.float32) mul = Net() - output = mul() - print(x) - print(y) - print(output) + out = mul(x0, y0).asnumpy() + exp = x0.asnumpy() * y0.asnumpy() + diff = np.abs(out - exp) + err = np.ones(shape=exp.shape) * 1.0e-5 + assert np.all(diff < err) + assert out.shape == exp.shape + + out = mul(x1, y1).asnumpy() + exp = x1.asnumpy() * y1.asnumpy() + diff = np.abs(out - exp) + err = np.ones(shape=exp.shape) * 1.0e-5 + assert np.all(diff < err) + assert out.shape == exp.shape + + out = mul(x2, y2).asnumpy() + exp = x2.asnumpy() * y2.asnumpy() + diff = np.abs(out - exp) + err = np.ones(shape=exp.shape) * 1.0e-5 + assert np.all(diff < err) + assert out.shape == exp.shape diff --git a/tests/st/ops/cpu/test_neg_op.py b/tests/st/ops/cpu/test_neg_op.py new file mode 100644 index 00000000000..6c3c06f6aa1 --- /dev/null +++ b/tests/st/ops/cpu/test_neg_op.py @@ -0,0 +1,60 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.common.api import ms_function +from mindspore.ops import operations as P +from mindspore.ops.composite import GradOperation + +context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + + +class Grad(nn.Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.grad = GradOperation(get_all=True, sens_param=True) + self.network = network + + @ms_function + def construct(self, input_, output_grad): + return self.grad(self.network)(input_, output_grad) + + +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.ops = P.Neg() + + def construct(self, x): + return self.ops(x) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_net(): + x = np.random.randn(2, 3, 3, 4).astype(np.float32) + y_expect = -x + net = Net() + out = net(Tensor(x)) + assert (out.asnumpy() == y_expect).all() + sens = np.random.randn(2, 3, 3, 4).astype(np.float32) + backword_net = Grad(Net()) + output = backword_net(Tensor(x), Tensor(sens)) + print(len(output)) + print(output[0].asnumpy()) diff --git a/tests/st/ops/cpu/test_pow_op.py b/tests/st/ops/cpu/test_pow_op.py new file mode 100644 index 00000000000..998b254f5d3 --- /dev/null +++ b/tests/st/ops/cpu/test_pow_op.py @@ -0,0 +1,58 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P + + +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.ops = P.Pow() + + def construct(self, x, y): + return self.ops(x, y) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu_training +@pytest.mark.env_onecard +def test_net(): + x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) + y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) + x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) + y1_np = np.array(3).astype(np.float32) + + x0 = Tensor(x0_np) + y0 = Tensor(y0_np) + x1 = Tensor(x1_np) + y1 = Tensor(y1_np) + + context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + net = Net() + out = net(x0, y0).asnumpy() + expect = np.power(x0_np, y0_np) + assert np.all(out == expect) + assert out.shape == expect.shape + + out = net(x1, y1).asnumpy() + expect = np.power(x1_np, y1_np) + assert np.all(out == expect) + assert out.shape == expect.shape diff --git a/tests/st/ops/cpu/test_realdiv_op.py b/tests/st/ops/cpu/test_realdiv_op.py new file mode 100644 index 00000000000..52c5d651e65 --- /dev/null +++ b/tests/st/ops/cpu/test_realdiv_op.py @@ -0,0 +1,95 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P + + +class NetRealDiv(nn.Cell): + def __init__(self): + super(NetRealDiv, self).__init__() + self.divide = P.RealDiv() + + def construct(self, x, y): + return self.divide(x, y) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu_training +@pytest.mark.env_onecard +def test_real_div(): + x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) + y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) + x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) + y1_np = np.random.randint(1, 5, (2, 1, 4, 4)).astype(np.float32) + x2_np = np.random.randint(1, 5, (2, 1, 1, 4)).astype(np.float32) + y2_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32) + x3_np = np.random.randint(1, 5, 1).astype(np.float32) + y3_np = np.random.randint(1, 5, 1).astype(np.float32) + x4_np = np.array(768).astype(np.float32) + y4_np = np.array(3072.5).astype(np.float32) + + x0 = Tensor(x0_np) + y0 = Tensor(y0_np) + x1 = Tensor(x1_np) + y1 = Tensor(y1_np) + x2 = Tensor(x2_np) + y2 = Tensor(y2_np) + x3 = Tensor(x3_np) + y3 = Tensor(y3_np) + x4 = Tensor(x4_np) + y4 = Tensor(y4_np) + + context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + real_div = NetRealDiv() + output0 = real_div(x0, y0) + expect0 = np.divide(x0_np, y0_np) + diff0 = output0.asnumpy() - expect0 + error0 = np.ones(shape=expect0.shape) * 1.0e-5 + assert np.all(diff0 < error0) + assert output0.shape == expect0.shape + + output1 = real_div(x1, y1) + expect1 = np.divide(x1_np, y1_np) + diff1 = output1.asnumpy() - expect1 + error1 = np.ones(shape=expect1.shape) * 1.0e-5 + assert np.all(diff1 < error1) + assert output1.shape == expect1.shape + + output2 = real_div(x2, y2) + expect2 = np.divide(x2_np, y2_np) + diff2 = output2.asnumpy() - expect2 + error2 = np.ones(shape=expect2.shape) * 1.0e-5 + assert np.all(diff2 < error2) + assert output2.shape == expect2.shape + + output3 = real_div(x3, y3) + expect3 = np.divide(x3_np, y3_np) + diff3 = output3.asnumpy() - expect3 + error3 = np.ones(shape=expect3.shape) * 1.0e-5 + assert np.all(diff3 < error3) + assert output3.shape == expect3.shape + + output4 = real_div(x4, y4) + expect4 = np.divide(x4_np, y4_np) + diff4 = output4.asnumpy() - expect4 + error4 = np.ones(shape=expect4.shape) * 1.0e-5 + assert np.all(diff4 < error4) + assert output4.shape == expect4.shape diff --git a/tests/st/ops/cpu/test_relu6_op.py b/tests/st/ops/cpu/test_relu6_op.py index b9530961887..e2f8b0cb4b2 100644 --- a/tests/st/ops/cpu/test_relu6_op.py +++ b/tests/st/ops/cpu/test_relu6_op.py @@ -20,7 +20,9 @@ import mindspore.context as context import mindspore.nn as nn from mindspore import Tensor from mindspore.ops import operations as P +from mindspore.ops.operations import _grad_ops as G +context.set_context(mode=context.GRAPH_MODE, device_target="CPU") class NetReLU6(nn.Cell): def __init__(self): @@ -30,6 +32,13 @@ class NetReLU6(nn.Cell): def construct(self, x): return self.relu6(x) +class NetReLU6Grad(nn.Cell): + def __init__(self): + super(NetReLU6Grad, self).__init__() + self.relu6_grad = G.ReLU6Grad() + + def construct(self, x, dy): + return self.relu6_grad(dy, x) @pytest.mark.level0 @pytest.mark.platform_x86_cpu @@ -42,7 +51,26 @@ def test_relu6(): [5.9, 6, 6,], [6, 1, 0.]]]]).astype(np.float32) - context.set_context(mode=context.GRAPH_MODE, device_target="CPU") relu6 = NetReLU6() output = relu6(x) assert (output.asnumpy() == expect).all() + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_relu6_grad(): + x = Tensor(np.array([[[[-1, 1, 10], + [5.9, 6.1, 6], + [10, 1, -1]]]]).astype(np.float32)) + dy = Tensor(np.array([[[[1, 1, 1], + [1, 1, 1], + [1, 1, 1]]]]).astype(np.float32)) + expect = np.array([[[[0, 1, 0,], + [1, 0, 1,], + [0, 1, 0,]]]]).astype(np.float32) + error = np.ones(shape=[3, 3]) * 1.0e-6 + + relu6_grad = NetReLU6Grad() + output = relu6_grad(x, dy) + diff = np.abs(output.asnumpy() - expect) + assert np.all(np.abs(diff) < error) diff --git a/tests/st/ops/cpu/test_relu_grad_op.py b/tests/st/ops/cpu/test_relu_grad_op.py index b1336b0ffe4..e76eaae87df 100644 --- a/tests/st/ops/cpu/test_relu_grad_op.py +++ b/tests/st/ops/cpu/test_relu_grad_op.py @@ -49,5 +49,5 @@ def test_relu_grad(): output = relu_grad() expect = np.array([[[[0, 0, 1,], [0, 0, 0,], [1, 1, 0.]]]]).astype(np.float32) error = np.ones(shape=[3, 3]) * 1.0e-6 - diff = output.asnumpy() - expect + diff = np.abs(output.asnumpy() - expect) assert np.all(diff < error) diff --git a/tests/st/ops/cpu/test_sigmoid_op.py b/tests/st/ops/cpu/test_sigmoid_op.py new file mode 100644 index 00000000000..1bdc2da46ca --- /dev/null +++ b/tests/st/ops/cpu/test_sigmoid_op.py @@ -0,0 +1,78 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P +from mindspore.ops.operations import _grad_ops as G + +context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + + +class NetSigmoidGrad(nn.Cell): + def __init__(self): + super(NetSigmoidGrad, self).__init__() + self.sigmoid_grad = G.SigmoidGrad() + + def construct(self, y, dy): + return self.sigmoid_grad(y, dy) + + +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.ops = P.Sigmoid() + + def construct(self, x): + return self.ops(x) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_net(): + x = np.random.randn(2, 3, 3, 4).astype(np.float32) + y_expect = 1 / (1 + np.exp(-x)) + net = Net() + out = net(Tensor(x)) + diff = out.asnumpy() - y_expect + err = np.ones(shape=y_expect.shape) * 1.0e-5 + assert np.all(diff < err) + assert out.shape == y_expect.shape + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_sigmoid_grad(): + y = Tensor(np.array([[[[-1, 1, 2], + [1, -1, 1], + [2, 1, -1]]]]).astype(np.float32)) + dy = Tensor(np.array([[[[-11, 2, 4], + [-1, 1, -1], + [-4, 4, -4]]]]).astype(np.float32)) + + expect = np.array([[[[22, 0, -8], + [0, -2, 0], + [8, 0, 8]]]]).astype(np.float32) + + error = np.ones(shape=[1, 1, 3, 3]) * 1.0e-6 + + sigmoid_grad = NetSigmoidGrad() + output = sigmoid_grad(y, dy) + diff = np.abs(output.asnumpy() - expect) + assert np.all(abs(diff) < error) diff --git a/tests/st/ops/cpu/test_sqrt_op.py b/tests/st/ops/cpu/test_sqrt_op.py new file mode 100644 index 00000000000..56d63f76d4e --- /dev/null +++ b/tests/st/ops/cpu/test_sqrt_op.py @@ -0,0 +1,75 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P +from mindspore.ops.operations import _grad_ops as G + +context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + +class NetSqrtGrad(nn.Cell): + def __init__(self): + super(NetSqrtGrad, self).__init__() + self.sqrt_grad = G.SqrtGrad() + + def construct(self, x, dx): + return self.sqrt_grad(x, dx) + + +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.ops = P.Sqrt() + + def construct(self, x): + return self.ops(x) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_net(): + x = np.abs(np.random.randn(2, 3, 3, 4)).astype(np.float32) + y_expect = np.sqrt(x) + net = Net() + out = net(Tensor(x)) + diff = out.asnumpy() - y_expect + err = np.ones(shape=y_expect.shape) * 1.0e-5 + assert np.all(diff < err) + assert out.shape == y_expect.shape + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_sqrt_grad(): + x = Tensor(np.array([[[[-1, 1, 10], + [5.9, 6.1, 6], + [10, 1, -1]]]]).astype(np.float32)) + dx = Tensor(np.array([[[[1, 1, 1], + [2, 2, 2], + [3, 3, 3]]]]).astype(np.float32)) + expect = np.array([[[[-0.5, 0.5, 0.05,], + [0.16949153, 0.16393442, 0.16666667,], + [0.15, 1.5, -1.5,]]]]).astype(np.float32) + error = np.ones(shape=[3, 3]) * 1.0e-6 + + sqrt_grad = NetSqrtGrad() + output = sqrt_grad(x, dx) + diff = np.abs(output.asnumpy() - expect) + assert np.all(np.abs(diff) < error) diff --git a/tests/st/ops/cpu/test_square_op.py b/tests/st/ops/cpu/test_square_op.py new file mode 100644 index 00000000000..c726ad56762 --- /dev/null +++ b/tests/st/ops/cpu/test_square_op.py @@ -0,0 +1,63 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.common.api import ms_function +from mindspore.ops import operations as P +from mindspore.ops.composite import GradOperation + +context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + + +class Grad(nn.Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.grad = GradOperation(get_all=True, sens_param=True) + self.network = network + + @ms_function + def construct(self, input_, output_grad): + return self.grad(self.network)(input_, output_grad) + + +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.ops = P.Square() + + def construct(self, x): + return self.ops(x) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_net(): + x = np.random.randn(2, 3, 3, 4).astype(np.float32) + y_expect = x * x + net = Net() + out = net(Tensor(x)) + diff = out.asnumpy() - y_expect + err = np.ones(shape=y_expect.shape) * 1.0e-5 + assert np.all(diff < err) + assert out.shape == y_expect.shape + sens = np.random.randn(2, 3, 3, 4).astype(np.float32) + backword_net = Grad(Net()) + output = backword_net(Tensor(x), Tensor(sens)) + print(len(output)) + print(output[0].asnumpy()) diff --git a/tests/st/ops/cpu/test_tanh_op.py b/tests/st/ops/cpu/test_tanh_op.py new file mode 100644 index 00000000000..056be73699e --- /dev/null +++ b/tests/st/ops/cpu/test_tanh_op.py @@ -0,0 +1,63 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.common.api import ms_function +from mindspore.ops import operations as P +from mindspore.ops.composite import GradOperation + +context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + + +class Grad(nn.Cell): + def __init__(self, network): + super(Grad, self).__init__() + self.grad = GradOperation(get_all=True, sens_param=True) + self.network = network + + @ms_function + def construct(self, input_, output_grad): + return self.grad(self.network)(input_, output_grad) + + +class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + self.ops = P.Tanh() + + def construct(self, x): + return self.ops(x) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_net(): + x = np.random.randn(2, 3, 3, 4).astype(np.float32) + y_expect = np.tanh(x) + net = Net() + out = net(Tensor(x)) + diff = out.asnumpy() - y_expect + err = np.ones(shape=y_expect.shape) * 1.0e-5 + assert np.all(diff < err) + assert out.shape == y_expect.shape + sens = np.random.randn(2, 3, 3, 4).astype(np.float32) + backword_net = Grad(Net()) + output = backword_net(Tensor(x), Tensor(sens)) + print(len(output)) + print(output[0].asnumpy()) diff --git a/tests/st/ops/cpu/test_tensoradd.py b/tests/st/ops/cpu/test_tensoradd.py index 19f3b7aa8b5..6284c0dbbc2 100644 --- a/tests/st/ops/cpu/test_tensoradd.py +++ b/tests/st/ops/cpu/test_tensoradd.py @@ -13,12 +13,15 @@ # limitations under the License. # ============================================================================ -import pytest import numpy as np -from mindspore import Tensor -from mindspore.ops import operations as P +import pytest + +import mindspore.common.dtype as mstype import mindspore.nn as nn -import mindspore.context as context +from mindspore import Tensor, context +from mindspore.ops import operations as P + +context.set_context(mode=context.GRAPH_MODE, device_target='CPU') class TensorAdd(nn.Cell): def __init__(self): @@ -34,10 +37,30 @@ class TensorAdd(nn.Cell): @pytest.mark.platform_x86_cpu @pytest.mark.env_onecard def test_tensor_add(): - x = np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32) - y = np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32) - - context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + x0 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)) + y0 = Tensor(np.random.uniform(-2, 2, (1, 1, 1, 1)).astype(np.float32)) + x1 = Tensor(np.random.uniform(-2, 2, (1, 3, 1, 4)).astype(np.float32)) + y1 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)) + x2 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)) + y2 = Tensor(2, mstype.float32) add = TensorAdd() - output = add(Tensor(x), Tensor(y)) - assert (output.asnumpy() == x + y).all() + out = add(x0, y0).asnumpy() + exp = x0.asnumpy() + y0.asnumpy() + diff = np.abs(out - exp) + err = np.ones(shape=exp.shape) * 1.0e-5 + assert np.all(diff < err) + assert out.shape == exp.shape + + out = add(x1, y1).asnumpy() + exp = x1.asnumpy() + y1.asnumpy() + diff = np.abs(out - exp) + err = np.ones(shape=exp.shape) * 1.0e-5 + assert np.all(diff < err) + assert out.shape == exp.shape + + out = add(x2, y2).asnumpy() + exp = x2.asnumpy() + y2.asnumpy() + diff = np.abs(out - exp) + err = np.ones(shape=exp.shape) * 1.0e-5 + assert np.all(diff < err) + assert out.shape == exp.shape