add some cpu operator

This commit is contained in:
zhaoting 2020-10-14 16:39:40 +08:00
parent af78c12a73
commit f2e9d9cfc7
44 changed files with 1946 additions and 311 deletions

View File

@ -13,9 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h"
#include <thread>
#include <cmath>
#include <string>
#include <thread>
#include "backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
@ -52,13 +53,35 @@ void ArithmeticCPUKernel::Mul(const T *input1, const T *input2, T *out, size_t s
}
template <typename T>
void ArithmeticCPUKernel::Div(const T *input1, const T *input2, T *out, size_t start, size_t end) {
void ArithmeticCPUKernel::RealDiv(const T *input1, const T *input2, T *out, size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
auto div_number = input2[i];
std::vector<size_t> idx;
GenIndex(i, &idx);
auto div_number = input2[idx[1]];
if (div_number == 0) {
MS_LOG(EXCEPTION) << "Cannot divided by 0!";
}
out[i] = input1[i] / div_number;
out[i] = input1[idx[0]] / div_number;
}
}
template <typename T>
void ArithmeticCPUKernel::Pow(const T *input1, const T *input2, T *out, size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
std::vector<size_t> idx;
GenIndex(i, &idx);
auto x = static_cast<double>(input1[idx[0]]);
auto y = static_cast<double>(input2[idx[1]]);
out[i] = static_cast<T>(std::pow(x, y));
}
}
template <typename T>
void ArithmeticCPUKernel::Less(const T *input1, const T *input2, bool *out, size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
std::vector<size_t> idx;
GenIndex(i, &idx);
out[i] = input1[idx[0]] < input2[idx[1]];
}
}
@ -71,10 +94,16 @@ void ArithmeticCPUKernel::InitKernel(const CNodePtr &kernel_node) {
operate_type_ = SUB;
} else if (kernel_name == prim::kPrimMul->name()) {
operate_type_ = MUL;
} else if (kernel_name == "Div") {
operate_type_ = DIV;
} else if (kernel_name == prim::kPrimRealDiv->name()) {
operate_type_ = REALDIV;
} else if (kernel_name == prim::kPrimPow->name()) {
operate_type_ = POW;
} else if (kernel_name == prim::kPrimLess->name()) {
operate_type_ = LESS;
} else if (kernel_name == prim::kPrimAssignAdd->name()) {
operate_type_ = ASSIGNADD;
} else {
MS_LOG(EXCEPTION) << "Not support " << kernel_name;
}
input_shape0_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
@ -145,14 +174,45 @@ void ArithmeticCPUKernel::GenIndex(size_t num, std::vector<size_t> *idx) {
idx->push_back(idx0);
idx->push_back(idx1);
}
template <typename T>
void ArithmeticCPUKernel::LaunchLess(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
T *input1 = reinterpret_cast<T *>(inputs[0]->addr);
T *input2 = reinterpret_cast<T *>(inputs[1]->addr);
bool *output = reinterpret_cast<bool *>(outputs[0]->addr);
size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1;
auto max_thread_num = std::thread::hardware_concurrency();
size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num;
MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num;
std::vector<std::thread> threads;
threads.reserve(thread_num);
size_t start = 0;
size_t once_compute_size = (lens + thread_num - 1) / thread_num;
while (start < lens) {
size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size);
threads.emplace_back(std::thread(&ArithmeticCPUKernel::Less<T>, this, input1, input2, output, start, end));
start += once_compute_size;
}
for (size_t i = 0; i < threads.size(); ++i) {
threads[i].join();
}
}
template <typename T>
void ArithmeticCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
if (operate_type_ == LESS) {
LaunchLess<T>(inputs, outputs);
return;
}
T *input1 = reinterpret_cast<T *>(inputs[0]->addr);
T *input2 = reinterpret_cast<T *>(inputs[1]->addr);
T *output = reinterpret_cast<T *>(outputs[0]->addr);
auto lens = outputs[0]->size / sizeof(T);
size_t thread_num = lens < 128 * 24 ? std::ceil(lens / 128.0) : 24;
MS_LOG(INFO) << "lens=" << lens << "; use thread_num=" << thread_num;
size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1;
auto max_thread_num = std::thread::hardware_concurrency();
size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num;
MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num;
std::vector<std::thread> threads;
threads.reserve(thread_num);
size_t start = 0;
@ -165,10 +225,14 @@ void ArithmeticCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, co
threads.emplace_back(std::thread(&ArithmeticCPUKernel::Sub<T>, this, input1, input2, output, start, end));
} else if (operate_type_ == MUL) {
threads.emplace_back(std::thread(&ArithmeticCPUKernel::Mul<T>, this, input1, input2, output, start, end));
} else if (operate_type_ == DIV) {
threads.emplace_back(std::thread(&ArithmeticCPUKernel::Div<T>, this, input1, input2, output, start, end));
} else if (operate_type_ == REALDIV) {
threads.emplace_back(std::thread(&ArithmeticCPUKernel::RealDiv<T>, this, input1, input2, output, start, end));
} else if (operate_type_ == POW) {
threads.emplace_back(std::thread(&ArithmeticCPUKernel::Pow<T>, this, input1, input2, output, start, end));
} else if (operate_type_ == ASSIGNADD) {
threads.emplace_back(std::thread(&ArithmeticCPUKernel::AssignAdd<T>, this, input1, input2, output, start, end));
} else {
MS_LOG(EXCEPTION) << "Not support " << operate_type_;
}
start += once_compute_size;
}

View File

@ -15,8 +15,8 @@
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARITHMETIC_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARITHMETIC_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include <vector>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
@ -31,7 +31,8 @@ class ArithmeticCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
template <typename T>
void LaunchLess(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
@ -44,9 +45,13 @@ class ArithmeticCPUKernel : public CPUKernel {
template <typename T>
void Mul(const T *input1, const T *input2, T *out, size_t start, size_t end);
template <typename T>
void Div(const T *input1, const T *input2, T *out, size_t start, size_t end);
void RealDiv(const T *input1, const T *input2, T *out, size_t start, size_t end);
template <typename T>
void Pow(const T *input1, const T *input2, T *out, size_t start, size_t end);
template <typename T>
void AssignAdd(T *input1, const T *input2, T *out, size_t start, size_t end);
template <typename T>
void Less(const T *input1, const T *input2, bool *out, size_t start, size_t end);
std::vector<size_t> input_shape0_;
std::vector<size_t> input_shape1_;
std::vector<size_t> input_element_num0_;
@ -66,6 +71,34 @@ MS_REG_CPU_KERNEL(
MS_REG_CPU_KERNEL(
Sub, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
ArithmeticCPUKernel);
MS_REG_CPU_KERNEL(
Pow, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
ArithmeticCPUKernel);
MS_REG_CPU_KERNEL(
Pow, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ArithmeticCPUKernel);
MS_REG_CPU_KERNEL(
Pow, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
ArithmeticCPUKernel);
MS_REG_CPU_KERNEL(
RealDiv, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
ArithmeticCPUKernel);
MS_REG_CPU_KERNEL(
RealDiv,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ArithmeticCPUKernel);
MS_REG_CPU_KERNEL(
RealDiv, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
ArithmeticCPUKernel);
MS_REG_CPU_KERNEL(
Less, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool),
ArithmeticCPUKernel);
MS_REG_CPU_KERNEL(
Less, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeBool),
ArithmeticCPUKernel);
MS_REG_CPU_KERNEL(
Less, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeBool),
ArithmeticCPUKernel);
MS_REG_CPU_KERNEL(
AssignAdd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
ArithmeticCPUKernel);

View File

@ -13,10 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h"
#include <cmath>
#include <thread>
#include <string>
#include <thread>
#include "backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
@ -30,9 +30,9 @@ void Square(const T *in, T *out, size_t start, size_t end) {
}
template <typename T>
void Sqrt(const T *in, T *out, size_t start, size_t end) {
void Neg(const T *in, T *out, size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = sqrtf(in[i]);
out[i] = -in[i];
}
}
} // namespace
@ -42,8 +42,8 @@ void ArithmeticSelfCPUKernel::InitKernel(const CNodePtr &kernel_node) {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
if (kernel_name == prim::kPrimSquare->name()) {
operate_type_ = SQUARE;
} else if (kernel_name == prim::kPrimSqrt->name()) {
operate_type_ = SQRT;
} else if (kernel_name == prim::kPrimNeg->name()) {
operate_type_ = NEG;
}
dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);
}
@ -66,10 +66,11 @@ void ArithmeticSelfCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs
const std::vector<AddressPtr> &outputs) {
T *input = reinterpret_cast<T *>(inputs[0]->addr);
T *output = reinterpret_cast<T *>(outputs[0]->addr);
auto lens = inputs[0]->size / sizeof(T);
MS_LOG(INFO) << "lens=" << lens;
size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1;
const size_t thread_num = 24;
auto max_thread_num = std::thread::hardware_concurrency();
size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num;
MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num;
std::vector<std::thread> threads;
threads.reserve(thread_num);
size_t start = 0;
@ -78,8 +79,8 @@ void ArithmeticSelfCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs
size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size);
if (operate_type_ == SQUARE) {
threads.emplace_back(std::thread(Square<T>, input, output, start, end));
} else if (operate_type_ == SQRT) {
threads.emplace_back(std::thread(Sqrt<T>, input, output, start, end));
} else if (operate_type_ == NEG) {
threads.emplace_back(std::thread(Neg<T>, input, output, start, end));
}
start += once_compute_size;
}

View File

@ -15,8 +15,8 @@
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARITHMETIC_SELF_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARITHMETIC_SELF_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include <vector>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
@ -40,10 +40,12 @@ class ArithmeticSelfCPUKernel : public CPUKernel {
TypeId dtype_{kTypeUnknown};
};
MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ArithmeticSelfCPUKernel);
MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
ArithmeticSelfCPUKernel);
MS_REG_CPU_KERNEL(Neg, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ArithmeticSelfCPUKernel);
MS_REG_CPU_KERNEL(Neg, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
ArithmeticSelfCPUKernel);
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,82 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cmath>
#include <map>
#include <string>
#include <thread>
#include "backend/kernel_compiler/cpu/cast_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
template <typename S, typename T>
void Cast(const S *in, T *out, size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = static_cast<T>(in[i]);
}
}
template <typename S, typename T>
void LaunchCast(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs) {
S *input = reinterpret_cast<S *>(inputs[0]->addr);
T *output = reinterpret_cast<T *>(outputs[0]->addr);
MS_LOG(DEBUG) << "Type source: " << typeid(S).name() << "; target: " << typeid(T).name();
size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1;
auto max_thread_num = std::thread::hardware_concurrency();
size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num;
MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num;
std::vector<std::thread> threads;
threads.reserve(thread_num);
size_t start = 0;
size_t once_compute_size = (lens + thread_num - 1) / thread_num;
while (start < lens) {
size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size);
threads.emplace_back(std::thread(Cast<S, T>, input, output, start, end));
start += once_compute_size;
}
for (size_t i = 0; i < threads.size(); ++i) {
threads[i].join();
}
}
void CastCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
source_dtype = AnfAlgo::GetPrevNodeOutputDeviceDataType(kernel_node, 0);
target_dtype = AnfAlgo::GetOutputInferDataType(kernel_node, 0);
}
bool CastCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
using TypePair =
std::function<void(const std::vector<kernel::AddressPtr> &, const std::vector<kernel::AddressPtr> &)>;
std::map<TypeId, std::map<TypeId, TypePair>> mode_map;
mode_map[kNumberTypeFloat32][kNumberTypeFloat32] = LaunchCast<float, float>;
mode_map[kNumberTypeFloat32][kNumberTypeInt32] = LaunchCast<float, int>;
mode_map[kNumberTypeFloat32][kNumberTypeBool] = LaunchCast<float, bool>;
mode_map[kNumberTypeInt32][kNumberTypeFloat32] = LaunchCast<int, float>;
mode_map[kNumberTypeInt32][kNumberTypeInt32] = LaunchCast<int, int>;
mode_map[kNumberTypeInt32][kNumberTypeBool] = LaunchCast<int, bool>;
mode_map[kNumberTypeBool][kNumberTypeFloat32] = LaunchCast<bool, float>;
mode_map[kNumberTypeBool][kNumberTypeBool] = LaunchCast<bool, bool>;
mode_map[kNumberTypeBool][kNumberTypeInt32] = LaunchCast<bool, int>;
mode_map[source_dtype][target_dtype](inputs, outputs);
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,54 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CAST_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CAST_CPU_KERNEL_H_
#include <functional>
#include <memory>
#include <vector>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
class CastCPUKernel : public CPUKernel {
public:
CastCPUKernel() = default;
~CastCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
TypeId source_dtype{kTypeUnknown};
TypeId target_dtype{kTypeUnknown};
};
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), CastCPUKernel);
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32), CastCPUKernel);
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeBool), CastCPUKernel);
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), CastCPUKernel);
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), CastCPUKernel);
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool), CastCPUKernel);
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), CastCPUKernel);
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeInt32), CastCPUKernel);
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeFloat32), CastCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CAST_CPU_KERNEL_H_

View File

@ -15,15 +15,14 @@
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CPU_KERNEL_H_
#include <string>
#include <vector>
#include <functional>
#include <memory>
#include <numeric>
#include <functional>
#include <string>
#include <vector>
#include "backend/kernel_compiler/kernel.h"
#include "ir/anf.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "ir/anf.h"
using mindspore::kernel::Address;
using mindspore::kernel::AddressPtr;
@ -52,7 +51,26 @@ const char END[] = "end";
const char SIZE[] = "size";
const char USE_NESTEROV[] = "use_nesterov";
const char GROUP[] = "group";
enum OperateType { ADD = 0, SUB, MUL, DIV, SQUARE, SQRT, ASSIGNADD };
enum OperateType {
ADD = 0,
SUB,
MUL,
DIV,
SQUARE,
SQRT,
POW,
REALDIV,
NEG,
LESS,
ASSIGNADD,
RELUGRAD,
RELU6GRAD,
ABSGRAD,
TANHGRAD,
SQRTGRAD,
SIGMOIDGRAD
};
class CPUKernel : public kernel::KernelMod {
public:

View File

@ -0,0 +1,177 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cmath>
#include <string>
#include <thread>
#include "backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
template <typename T>
void EltWiseGradCPUKernel::ReluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
if (input2[i] > 0) {
out[i] = input1[i];
} else {
out[i] = 0;
}
}
}
template <typename T>
void EltWiseGradCPUKernel::ReLU6Grad(const T *input1, const T *input2, T *out, size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
if (input2[i] > 0 && input2[i] <= 6) {
out[i] = input1[i];
} else {
out[i] = 0;
}
}
}
template <typename T>
void EltWiseGradCPUKernel::AbsGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
if (input1[i] > 0) {
out[i] = input2[i];
} else if (input1[i] < 0) {
out[i] = -input2[i];
} else {
out[i] = 0;
}
}
}
template <typename T>
void EltWiseGradCPUKernel::SigmoidGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = input2[i] * input1[i] * (1 - input1[i]);
}
}
template <typename T>
void EltWiseGradCPUKernel::SqrtGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = input2[i] / (input1[i] * 2);
}
}
template <typename T>
void EltWiseGradCPUKernel::TanhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
T tmp = (1 - input1[i]);
out[i] = input2[i] * tmp * tmp;
}
}
void EltWiseGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
if (kernel_name == "ReluGrad") {
operate_type_ = RELUGRAD;
} else if (kernel_name == "ReLU6Grad") {
operate_type_ = RELU6GRAD;
} else if (kernel_name == "SigmoidGrad") {
operate_type_ = SIGMOIDGRAD;
} else if (kernel_name == "AbsGrad") {
operate_type_ = ABSGRAD;
} else if (kernel_name == "TanhGrad") {
operate_type_ = TANHGRAD;
} else if (kernel_name == "SqrtGrad") {
operate_type_ = SQRTGRAD;
} else {
MS_LOG(EXCEPTION) << "Not support " << kernel_name;
}
input_shape0_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
input_shape1_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
if (output_shape_.size() == 0) {
output_shape_.insert(output_shape_.begin(), 1);
}
size_t l = input_shape0_.size();
for (size_t i = 0; i < output_shape_.size() - l; ++i) {
input_shape0_.insert(input_shape0_.begin(), 1);
}
l = input_shape1_.size();
for (size_t i = 0; i < output_shape_.size() - l; ++i) {
input_shape1_.insert(input_shape1_.begin(), 1);
}
CPUKernelUtils::GetElementNumEveryDim(input_shape0_, &input_element_num0_);
CPUKernelUtils::GetElementNumEveryDim(input_shape1_, &input_element_num1_);
CPUKernelUtils::GetElementNumEveryDim(output_shape_, &output_element_num_);
dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);
if (dtype_ != AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 1)) {
MS_LOG(EXCEPTION) << "Input0 and input1 must has the same data type";
}
}
bool EltWiseGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (dtype_ == kNumberTypeInt32) {
LaunchKernel<int>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
LaunchKernel<float>(inputs, outputs);
} else if (dtype_ == kNumberTypeInt64) {
LaunchKernel<int64_t>(inputs, outputs);
} else {
MS_LOG(EXCEPTION) << "Only support int32, float32, but actual data type is " << TypeIdLabel(dtype_);
}
return true;
}
template <typename T>
void EltWiseGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
T *input1 = reinterpret_cast<T *>(inputs[0]->addr);
T *input2 = reinterpret_cast<T *>(inputs[1]->addr);
T *output = reinterpret_cast<T *>(outputs[0]->addr);
size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1;
auto max_thread_num = std::thread::hardware_concurrency();
size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num;
MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num;
std::vector<std::thread> threads;
threads.reserve(thread_num);
size_t start = 0;
size_t once_compute_size = (lens + thread_num - 1) / thread_num;
while (start < lens) {
size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size);
if (operate_type_ == RELUGRAD) {
threads.emplace_back(std::thread(&EltWiseGradCPUKernel::ReluGrad<T>, this, input1, input2, output, start, end));
} else if (operate_type_ == RELU6GRAD) {
threads.emplace_back(std::thread(&EltWiseGradCPUKernel::ReLU6Grad<T>, this, input1, input2, output, start, end));
} else if (operate_type_ == ABSGRAD) {
threads.emplace_back(std::thread(&EltWiseGradCPUKernel::AbsGrad<T>, this, input1, input2, output, start, end));
} else if (operate_type_ == SIGMOIDGRAD) {
threads.emplace_back(
std::thread(&EltWiseGradCPUKernel::SigmoidGrad<T>, this, input1, input2, output, start, end));
} else if (operate_type_ == TANHGRAD) {
threads.emplace_back(std::thread(&EltWiseGradCPUKernel::TanhGrad<T>, this, input1, input2, output, start, end));
} else if (operate_type_ == SQRTGRAD) {
threads.emplace_back(std::thread(&EltWiseGradCPUKernel::SqrtGrad<T>, this, input1, input2, output, start, end));
} else {
MS_LOG(EXCEPTION) << "Not support " << operate_type_;
}
start += once_compute_size;
}
for (size_t i = 0; i < threads.size(); ++i) {
threads[i].join();
}
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,87 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_GRAD_CPU_KERNEL_H_
#include <memory>
#include <vector>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
class EltWiseGradCPUKernel : public CPUKernel {
public:
EltWiseGradCPUKernel() = default;
~EltWiseGradCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
private:
template <typename T>
void ReluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end);
template <typename T>
void ReLU6Grad(const T *input1, const T *input2, T *out, size_t start, size_t end);
template <typename T>
void AbsGrad(const T *input1, const T *input2, T *out, size_t start, size_t end);
template <typename T>
void SigmoidGrad(const T *input1, const T *input2, T *out, size_t start, size_t end);
template <typename T>
void SqrtGrad(const T *input1, const T *input2, T *out, size_t start, size_t end);
template <typename T>
void TanhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end);
std::vector<size_t> input_shape0_;
std::vector<size_t> input_shape1_;
std::vector<size_t> input_element_num0_;
std::vector<size_t> input_element_num1_;
std::vector<size_t> output_shape_;
std::vector<size_t> output_element_num_;
OperateType operate_type_{RELUGRAD};
TypeId dtype_{kTypeUnknown};
};
MS_REG_CPU_KERNEL(
ReluGrad,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
EltWiseGradCPUKernel);
MS_REG_CPU_KERNEL(
ReLU6Grad,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
EltWiseGradCPUKernel);
MS_REG_CPU_KERNEL(
AbsGrad,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
EltWiseGradCPUKernel);
MS_REG_CPU_KERNEL(
SigmoidGrad,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
EltWiseGradCPUKernel);
MS_REG_CPU_KERNEL(
SqrtGrad,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
EltWiseGradCPUKernel);
MS_REG_CPU_KERNEL(
TanhGrad,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
EltWiseGradCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_GRAD_CPU_KERNEL_H_

View File

@ -0,0 +1,76 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h"
#include <string>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "utils/ms_utils.h"
namespace mindspore {
namespace kernel {
dnnl::eltwise_forward::desc EltWiseCPUKernel::GetForwardEltwiseDesc(const CNodePtr &kernel_node,
dnnl::memory::desc src_desc) {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
if (kernel_name == "ReLU") {
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_relu, src_desc, 0.0);
} else if (kernel_name == "ReLU6") {
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_clip, src_desc, 0.0, 6.0);
} else if (kernel_name == "Abs") {
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_abs, src_desc);
} else if (kernel_name == "Exp") {
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_exp, src_desc);
} else if (kernel_name == "Log") {
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_log, src_desc);
} else if (kernel_name == "Sigmoid") {
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_logistic, src_desc);
} else if (kernel_name == "Sqrt") {
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_sqrt, src_desc);
} else if (kernel_name == "Square") {
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_square, src_desc);
} else if (kernel_name == "Tanh") {
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_tanh, src_desc);
} else {
MS_LOG(EXCEPTION) << "Eltwise operators don't support " << kernel_name;
}
}
void EltWiseCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
auto desc = GetForwardEltwiseDesc(kernel_node, src_desc);
auto prim_desc = dnnl::eltwise_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
primitive_ = std::make_shared<dnnl::eltwise_forward>(prim_desc);
AddArgument(DNNL_ARG_SRC, src_desc);
AddArgument(DNNL_ARG_DST, src_desc);
}
bool EltWiseCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.empty() || outputs.empty()) {
MS_LOG(EXCEPTION) << "error input output size!";
}
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
ExecutePrimitive();
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,60 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_CPU_KERNEL_H_
#include <memory>
#include <vector>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {
class EltWiseCPUKernel : public MKLCPUKernel {
public:
EltWiseCPUKernel() = default;
~EltWiseCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
dnnl::eltwise_forward::desc GetForwardEltwiseDesc(const CNodePtr &kernel_node, dnnl::memory::desc src_desc);
dnnl::prop_kind DnnlForward = dnnl::prop_kind::forward_training;
};
MS_REG_CPU_KERNEL(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
EltWiseCPUKernel);
MS_REG_CPU_KERNEL(ReLU6, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
EltWiseCPUKernel);
MS_REG_CPU_KERNEL(Abs, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
EltWiseCPUKernel);
MS_REG_CPU_KERNEL(Exp, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
EltWiseCPUKernel);
MS_REG_CPU_KERNEL(Log, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
EltWiseCPUKernel);
MS_REG_CPU_KERNEL(Sigmoid, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
EltWiseCPUKernel);
MS_REG_CPU_KERNEL(Sqrt, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
EltWiseCPUKernel);
MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
EltWiseCPUKernel);
MS_REG_CPU_KERNEL(Tanh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
EltWiseCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_CPU_KERNEL_H_

View File

@ -13,12 +13,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <string>
#include "backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_cpu_kernel.h"
#include "utils/ms_utils.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "utils/ms_utils.h"
namespace mindspore {
namespace kernel {

View File

@ -15,9 +15,8 @@
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include <vector>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
@ -74,4 +73,4 @@ MS_REG_CPU_KERNEL(BatchNorm,
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONV2D_CPU_KERNEL_H_
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_CPU_KERNEL_H_

View File

@ -0,0 +1,110 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_gard_cpu_kernel.h"
#include <string>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "utils/ms_utils.h"
namespace mindspore {
namespace kernel {
void FusedBatchNormGradCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
CPUKernel::InitInputOutputSize(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_node);
size_t type_size = sizeof(float);
std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
size_t tensor_size = shape[1] * 2 * type_size;
// [2, c] to store scale and bias
workspace_size_list_.emplace_back(tensor_size);
// [2, c] to store diff_scale and diff_bias
workspace_size_list_.emplace_back(tensor_size);
}
void FusedBatchNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (x_shape.size() != 4) {
MS_LOG(EXCEPTION) << "Fused batchnorm only support nchw input!";
}
batch_size = x_shape[0];
channel = x_shape[1];
hw_size = x_shape[2] * x_shape[3];
nhw_size = x_shape[0] * hw_size;
dnnl::memory::desc x_desc = GetDefaultMemDesc(x_shape);
dnnl::memory::desc scale_bias_desc = GetDefaultMemDesc({2, channel});
auto epsilon = AnfAlgo::GetNodeAttr<float>(kernel_node, "epsilon");
auto prop_kind = dnnl::prop_kind::forward_training;
auto normalization_flags = dnnl::normalization_flags::use_scale_shift;
// fused batch normalization forward description
dnnl::batch_normalization_forward::desc desc =
dnnl::batch_normalization_forward::desc(prop_kind, x_desc, epsilon, normalization_flags);
auto forward_prim_desc = dnnl::batch_normalization_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
// fused batch normalization backward description
dnnl::batch_normalization_backward::desc backward_desc =
dnnl::batch_normalization_backward::desc(dnnl::prop_kind::backward, x_desc, x_desc, epsilon, normalization_flags);
auto backward_prim_desc = dnnl::batch_normalization_backward::primitive_desc(
backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc);
primitive_ = std::make_shared<dnnl::batch_normalization_backward>(backward_prim_desc);
AddArgument(DNNL_ARG_SRC, x_desc);
AddArgument(DNNL_ARG_MEAN, forward_prim_desc.mean_desc());
AddArgument(DNNL_ARG_VARIANCE, forward_prim_desc.variance_desc());
AddArgument(DNNL_ARG_SCALE_SHIFT, scale_bias_desc);
AddArgument(DNNL_ARG_WORKSPACE, forward_prim_desc.workspace_desc());
AddArgument(DNNL_ARG_DST, x_desc);
AddArgument(DNNL_ARG_DIFF_DST, x_desc);
AddArgument(DNNL_ARG_DIFF_SRC, x_desc);
AddArgument(DNNL_ARG_DIFF_SCALE_SHIFT, scale_bias_desc);
}
bool FusedBatchNormGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 5 || outputs.empty()) {
MS_LOG(EXCEPTION) << "Error input output size!";
}
auto wksp_in = reinterpret_cast<float *>(workspace[0]->addr);
auto scale_ret = memcpy_s(wksp_in, workspace[0]->size, inputs[2]->addr, inputs[2]->size);
auto max_size = workspace[0]->size - inputs[2]->size;
auto bias_ret = memcpy_s(wksp_in + (inputs[2]->size / sizeof(float)), max_size, inputs[3]->addr, inputs[3]->size);
if (scale_ret != 0 || bias_ret != 0) {
MS_LOG(EXCEPTION) << "Memcpy_s error.";
return false;
}
SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_MEAN, inputs[4]->addr);
SetArgumentHandle(DNNL_ARG_VARIANCE, inputs[5]->addr);
SetArgumentHandle(DNNL_ARG_SCALE_SHIFT, workspace[0]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_SRC, outputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_SCALE_SHIFT, workspace[1]->addr);
ExecutePrimitive();
auto wksp_out = reinterpret_cast<float *>(workspace[1]->addr);
auto diff_scale_ret = memcpy_s(outputs[1]->addr, outputs[1]->size, wksp_out, inputs[2]->size);
auto diff_bias_ret =
memcpy_s(outputs[2]->addr, outputs[2]->size, wksp_out + (outputs[1]->size / sizeof(float)), inputs[3]->size);
if (diff_scale_ret != 0 || diff_bias_ret != 0) {
MS_LOG(EXCEPTION) << "Memcpy_s error.";
return false;
}
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,61 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_GRAD_CPU_KERNEL_H_
#include <memory>
#include <vector>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {
class FusedBatchNormGradCPUKernel : public MKLCPUKernel {
public:
FusedBatchNormGradCPUKernel() = default;
~FusedBatchNormGradCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
protected:
void InitInputOutputSize(const CNodePtr &kernel_node) override;
private:
float momentum{0.9};
size_t batch_size{0};
size_t channel{0};
size_t hw_size{0};
size_t nhw_size{0};
};
MS_REG_CPU_KERNEL(FusedBatchNormGradCPU,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
FusedBatchNormGradCPUKernel)
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_GRAD_CPU_KERNEL_H_

View File

@ -25,24 +25,53 @@ void MulCPUKernel::InitKernel(const CNodePtr &kernel_node) {
std::vector<size_t> src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<size_t> src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
if (src0_shape.size() != src1_shape.size() && src1_shape.size() > 1) {
MS_LOG(EXCEPTION) << "mul only support same dim input or tensor * scalar " << src0_shape.size() << " vs "
<< src1_shape.size();
if (src1_shape.size() != src0_shape.size()) {
if (src0_shape.size() == 0) {
need_swap_ = true;
for (size_t i = 0; i < src1_shape.size(); ++i) {
src0_shape.emplace_back(1);
}
if (src1_shape.size() < src0_shape.size()) {
for (size_t i = src1_shape.size(); i < src0_shape.size(); ++i) {
} else if (src1_shape.size() == 0) {
for (size_t i = 0; i < src0_shape.size(); ++i) {
src1_shape.emplace_back(1);
}
} else {
MS_LOG(EXCEPTION) << "Invalid broadcast! " << src0_shape << " vs " << src1_shape;
}
dnnl::memory::desc src0_mem_desc = GetDefaultMemDesc(src0_shape);
dnnl::memory::desc src1_mem_desc = GetDefaultMemDesc(src1_shape);
dnnl::memory::desc dst_mem_desc = GetDefaultMemDesc(dst_shape);
dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_mul, src0_mem_desc, src1_mem_desc, dst_mem_desc);
} else {
bool visit_src0 = false;
bool visit_src1 = false;
for (size_t i = 0; i < src0_shape.size(); ++i) {
if (src0_shape[i] != src1_shape[i]) {
if (src0_shape[i] == 1 && !visit_src1) {
need_swap_ = true;
visit_src0 = true;
} else if (src1_shape[i] == 1 && !visit_src0) {
need_swap_ = false;
visit_src1 = true;
} else {
MS_LOG(EXCEPTION) << "Invalid broadcast! " << src0_shape << " vs " << src1_shape;
}
}
}
}
dnnl::memory::desc src0_desc;
dnnl::memory::desc src1_desc;
if (need_swap_) {
src0_desc = GetDefaultMemDesc(src1_shape);
src1_desc = GetDefaultMemDesc(src0_shape);
} else {
src0_desc = GetDefaultMemDesc(src0_shape);
src1_desc = GetDefaultMemDesc(src1_shape);
}
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_mul, src0_desc, src1_desc, dst_desc);
auto prim_desc = dnnl::binary::primitive_desc(desc, MKLKernelEngine::Get().engine());
primitive_ = std::make_shared<dnnl::binary>(prim_desc);
AddArgument(DNNL_ARG_SRC_0, src0_mem_desc);
AddArgument(DNNL_ARG_SRC_1, src1_mem_desc);
AddArgument(DNNL_ARG_DST, dst_mem_desc);
AddArgument(DNNL_ARG_SRC_0, src0_desc);
AddArgument(DNNL_ARG_SRC_1, src1_desc);
AddArgument(DNNL_ARG_DST, dst_desc);
}
bool MulCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
@ -51,8 +80,13 @@ bool MulCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
if (inputs.size() < 2 || outputs.empty()) {
MS_LOG(EXCEPTION) << "mul error input output size!";
}
if (need_swap_) {
SetArgumentHandle(DNNL_ARG_SRC_0, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_SRC_1, inputs[0]->addr);
} else {
SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr);
}
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
ExecutePrimitive();
return true;

View File

@ -31,6 +31,9 @@ class MulCPUKernel : public MKLCPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
bool need_swap_{false};
};
MS_REG_CPU_KERNEL(

View File

@ -1,59 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <string>
#include "backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "utils/ms_utils.h"
namespace mindspore {
namespace kernel {
void ReluCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (src_shape.size() != 4 && src_shape.size() != 2) {
MS_LOG(EXCEPTION) << "relu kernel dims invalid " << src_shape.size();
}
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
dnnl::eltwise_forward::desc desc =
dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_relu, src_desc, 0.0);
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
if (kernel_name == "ReLU6") {
desc =
dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_clip, src_desc, 0.0, 6.0);
}
auto prim_desc = dnnl::eltwise_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
primitive_ = std::make_shared<dnnl::eltwise_forward>(prim_desc);
AddArgument(DNNL_ARG_SRC, src_desc);
AddArgument(DNNL_ARG_DST, src_desc);
}
bool ReluCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.empty() || outputs.empty()) {
MS_LOG(EXCEPTION) << "error input output size!";
}
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
ExecutePrimitive();
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -1,42 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {
class ReluCPUKernel : public MKLCPUKernel {
public:
ReluCPUKernel() = default;
~ReluCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
};
MS_REG_CPU_KERNEL(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), ReluCPUKernel);
MS_REG_CPU_KERNEL(ReLU6, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ReluCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_CPU_KERNEL_H_

View File

@ -1,69 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h"
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
#include "runtime/device/cpu/cpu_device_address.h"
#include "utils/ms_utils.h"
namespace mindspore {
namespace kernel {
void ReluGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
if (src_shape.size() != 4 && src_shape.size() != 2) {
MS_LOG(EXCEPTION) << "relu grad kernel dims invalid " << src_shape.size();
}
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
dnnl::eltwise_forward::desc forward_desc =
dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_relu, src_desc, 0.0);
auto forward_prim_desc = dnnl::eltwise_forward::primitive_desc(forward_desc, MKLKernelEngine::Get().engine());
dnnl::eltwise_backward::desc backward_desc =
dnnl::eltwise_backward::desc(dnnl::algorithm::eltwise_relu, src_desc, src_desc, 0.0, 0.0);
auto backward_prim_desc =
dnnl::eltwise_backward::primitive_desc(backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc);
primitive_ = std::make_shared<dnnl::eltwise_backward>(backward_prim_desc);
AddArgument(DNNL_ARG_SRC, src_desc);
AddArgument(DNNL_ARG_DIFF_SRC, src_desc);
AddArgument(DNNL_ARG_DIFF_DST, src_desc);
}
bool ReluGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() < 2 || outputs.empty()) {
MS_LOG(EXCEPTION) << "relu grad error input output size!";
}
if (inputs[0]->size != outputs[0]->size) {
MS_LOG(EXCEPTION) << "relu grad error input output data size!";
}
SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_SRC, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr);
ExecutePrimitive();
size_t mem_bits = outputs[0]->size;
auto ret = memcpy_s(outputs[0]->addr, mem_bits, inputs[0]->addr, mem_bits);
if (ret != 0) {
MS_LOG(EXCEPTION) << "memcpy_s error, errorno " << ret;
return false;
}
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -1,43 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_GRAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_GRAD_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
namespace mindspore {
namespace kernel {
class ReluGradCPUKernel : public MKLCPUKernel {
public:
ReluGradCPUKernel() = default;
~ReluGradCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
};
MS_REG_CPU_KERNEL(
ReluGrad,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ReluGradCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_GRAD_CPU_KERNEL_H_

View File

@ -25,17 +25,45 @@ void TensorAddCPUKernel::InitKernel(const CNodePtr &kernel_node) {
std::vector<size_t> src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
std::vector<size_t> src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
if (src0_shape.size() != src1_shape.size() && src1_shape.size() > 1) {
MS_LOG(EXCEPTION) << "TensorAdd only support same dim input or tensor * scalar " << src0_shape.size() << " vs "
<< src1_shape.size();
if (src1_shape.size() != src0_shape.size()) {
if (src0_shape.size() == 0) {
need_swap_ = true;
for (size_t i = 0; i < src1_shape.size(); ++i) {
src0_shape.emplace_back(1);
}
if (src1_shape.size() < src0_shape.size()) {
for (size_t i = src1_shape.size(); i < src0_shape.size(); ++i) {
} else if (src1_shape.size() == 0) {
for (size_t i = 0; i < src0_shape.size(); ++i) {
src1_shape.emplace_back(1);
}
} else {
MS_LOG(EXCEPTION) << "Invalid broadcast! " << src0_shape << " vs " << src1_shape;
}
} else {
bool visit_src0 = false;
bool visit_src1 = false;
for (size_t i = 0; i < src0_shape.size(); ++i) {
if (src0_shape[i] != src1_shape[i]) {
if (src0_shape[i] == 1 && !visit_src1) {
need_swap_ = true;
visit_src0 = true;
} else if (src1_shape[i] == 1 && !visit_src0) {
need_swap_ = false;
visit_src1 = true;
} else {
MS_LOG(EXCEPTION) << "Invalid broadcast! " << src0_shape << " vs " << src1_shape;
}
}
}
}
dnnl::memory::desc src0_desc;
dnnl::memory::desc src1_desc;
if (need_swap_) {
src0_desc = GetDefaultMemDesc(src1_shape);
src1_desc = GetDefaultMemDesc(src0_shape);
} else {
src0_desc = GetDefaultMemDesc(src0_shape);
src1_desc = GetDefaultMemDesc(src1_shape);
}
dnnl::memory::desc src0_desc = GetDefaultMemDesc(src0_shape);
dnnl::memory::desc src1_desc = GetDefaultMemDesc(src1_shape);
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_add, src0_desc, src1_desc, dst_desc);
auto prim_desc = dnnl::binary::primitive_desc(desc, MKLKernelEngine::Get().engine());
@ -51,8 +79,13 @@ bool TensorAddCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
if (inputs.size() < 2 || outputs.empty()) {
MS_LOG(EXCEPTION) << "TensorAdd error input output size!";
}
if (need_swap_) {
SetArgumentHandle(DNNL_ARG_SRC_0, inputs[1]->addr);
SetArgumentHandle(DNNL_ARG_SRC_1, inputs[0]->addr);
} else {
SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr);
SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr);
}
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
ExecutePrimitive();
return true;

View File

@ -31,6 +31,9 @@ class TensorAddCPUKernel : public MKLCPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
private:
bool need_swap_{false};
};
MS_REG_CPU_KERNEL(

View File

@ -39,6 +39,7 @@ MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutput
ReshapeCPUKernel);
MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
ReshapeCPUKernel);
MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), ReshapeCPUKernel);
MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ReshapeCPUKernel);
@ -46,6 +47,7 @@ MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutput
ReshapeCPUKernel);
MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
ReshapeCPUKernel);
MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), ReshapeCPUKernel);
MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ReshapeCPUKernel);
@ -53,6 +55,8 @@ MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOut
ReshapeCPUKernel);
MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
ReshapeCPUKernel);
MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
ReshapeCPUKernel);
} // namespace kernel
} // namespace mindspore

View File

@ -560,10 +560,16 @@ def get_bprop_gelu(self):
def get_bprop_fused_batch_norm(self):
"""Grad definition for `FusedBatchNorm` operation."""
input_grad = G.FusedBatchNormGrad(self.epsilon, self.momentum)
target_cpu = False
if self.target == "CPU":
input_grad = G.FusedBatchNormGradCPU(self.epsilon, self.momentum)
target_cpu = True
def bprop(x, scale, b, mean, variance, out, dout):
saved_mean = out[3]
saved_variance = out[4]
if target_cpu:
out = input_grad(dout[0], x, scale, b, saved_mean, saved_variance)
else:
out = input_grad(dout[0], x, scale, saved_mean, saved_variance)
dx = out[0]
dscale = out[1]

View File

@ -540,6 +540,22 @@ class FusedBatchNormGrad(Primitive):
raise NotImplementedError
class FusedBatchNormGradCPU(PrimitiveWithInfer):
"""Gradients of FusedBatchNorm operation for CPU."""
@prim_attr_register
def __init__(self, epsilon=0.0, momentum=0.1):
self.init_prim_io_names(inputs=['dy', 'x', 'scale', 'bias', 'save_mean', 'save_inv_variance'],
outputs=['dx', 'bn_scale', 'bn_bias'])
self.add_prim_attr('data_format', "NCHW")
def infer_shape(self, dy_shape, x_shape, scale_shape, bias_shape, save_mean_shape, save_inv_variance_shape):
return (x_shape, scale_shape, bias_shape)
def infer_dtype(self, dy_type, x_type, scale_type, bias_type, save_mean_type, save_inv_variance_type):
return (x_type, scale_type, bias_type)
class FusedBatchNormGradEx(PrimitiveWithInfer):
"""Gradients of FusedBatchNormEx operation."""

View File

@ -640,6 +640,7 @@ class FusedBatchNorm(Primitive):
self.epsilon = validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', self.name)
self.momentum = validator.check_float_range(momentum, 0, 1, Rel.INC_BOTH, 'momentum', self.name)
self._update_parameter = True
self.target = context.get_context("device_target")
class FusedBatchNormEx(PrimitiveWithInfer):

View File

@ -0,0 +1,60 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.common.api import ms_function
from mindspore.ops import operations as P
from mindspore.ops.composite import GradOperation
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
class Grad(nn.Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.grad = GradOperation(get_all=True, sens_param=True)
self.network = network
@ms_function
def construct(self, input_, output_grad):
return self.grad(self.network)(input_, output_grad)
class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.ops = P.Abs()
def construct(self, x):
return self.ops(x)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_net():
x = np.random.randn(2, 3, 3, 4).astype(np.float32)
y_expect = np.abs(x)
net = Net()
out = net(Tensor(x))
assert (out.asnumpy() == y_expect).all()
sens = np.random.randn(2, 3, 3, 4).astype(np.float32)
backword_net = Grad(Net())
output = backword_net(Tensor(x), Tensor(sens))
print(len(output))
print(output[0].asnumpy())

View File

@ -80,3 +80,39 @@ def test_train_forward():
bn_net = Batchnorm_Net(2, Tensor(weight), Tensor(bias), Tensor(moving_mean), Tensor(moving_var_init))
bn_net.set_train(False)
output = bn_net(Tensor(x))
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_train_backward():
x = np.array([[
[[1, 3, 3, 5], [2, 4, 6, 8], [3, 6, 7, 7], [4, 3, 8, 2]],
[[5, 7, 6, 3], [3, 5, 6, 7], [9, 4, 2, 5], [7, 5, 8, 1]]]]).astype(np.float32)
grad = np.array([[
[[1, 2, 7, 1], [4, 2, 1, 3], [1, 6, 5, 2], [2, 4, 3, 2]],
[[9, 4, 3, 5], [1, 3, 7, 6], [5, 7, 9, 9], [1, 4, 6, 8]]]]).astype(np.float32)
expect_output = np.array([[[[-0.69126546, -0.32903028, 1.9651246, -0.88445705],
[0.6369296, -0.37732816, -0.93275493, -0.11168876],
[-0.7878612, 1.3614, 0.8542711, -0.52222186],
[-0.37732816, 0.5886317, -0.11168876, -0.28073236]],
[[1.6447213, -0.38968924, -1.0174079, -0.55067265],
[-2.4305856, -1.1751484, 0.86250514, 0.5502673],
[0.39576983, 0.5470243, 1.1715001, 1.6447213],
[-1.7996241, -0.7051701, 0.7080077, 0.5437813]]]]).astype(np.float32)
weight = Tensor(np.ones(2).astype(np.float32))
bias = Tensor(np.ones(2).astype(np.float32))
moving_mean = Tensor(np.ones(2).astype(np.float32))
moving_var_init = Tensor(np.ones(2).astype(np.float32))
error = np.ones(shape=[1, 2, 4, 4]) * 1.0e-6
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
bn_net = Batchnorm_Net(2, weight, bias, moving_mean, moving_var_init)
bn_net.set_train()
bn_grad = Grad(bn_net)
output = bn_grad(Tensor(x), Tensor(grad))
diff = output[0].asnumpy() - expect_output
assert np.all(diff < error)
assert np.all(-diff < error)

View File

@ -0,0 +1,76 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.common.dtype as mstype
import mindspore.context as context
from mindspore.common.tensor import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
class Net(Cell):
def __init__(self, dtype):
super(Net, self).__init__()
self.Cast = P.Cast()
self.dtype = dtype
def construct(self, x):
return self.Cast(x, self.dtype)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_cast_int32():
x0 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.float32))
x1 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.int32))
x2 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.bool))
t = mstype.int32
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
net = Net(t)
output = net(x0)
type0 = output.asnumpy().dtype
assert type0 == 'int32'
output = net(x1)
type1 = output.asnumpy().dtype
assert type1 == 'int32'
output = net(x2)
type2 = output.asnumpy().dtype
assert type2 == 'int32'
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_cast_float32():
x0 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.float32))
x1 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.int32))
x2 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.bool))
t = mstype.float32
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
net = Net(t)
output = net(x0)
type0 = output.asnumpy().dtype
assert type0 == 'float32'
output = net(x1)
type1 = output.asnumpy().dtype
assert type1 == 'float32'
output = net(x2)
type2 = output.asnumpy().dtype
assert type2 == 'float32'

View File

@ -0,0 +1,56 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
class NetExp(nn.Cell):
def __init__(self):
super(NetExp, self).__init__()
self.exp = P.Exp()
def construct(self, x):
return self.exp(x)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_exp():
x0_np = np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)
x1_np = np.random.uniform(-2, 2, 1).astype(np.float32)
x0 = Tensor(x0_np)
x1 = Tensor(x1_np)
expect0 = np.exp(x0_np)
expect1 = np.exp(x1_np)
error0 = np.ones(shape=expect0.shape) * 1.0e-5
error1 = np.ones(shape=expect1.shape) * 1.0e-5
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
exp = NetExp()
output0 = exp(x0)
diff0 = output0.asnumpy() - expect0
assert np.all(diff0 < error0)
assert output0.shape == expect0.shape
output1 = exp(x1)
diff1 = output1.asnumpy() - expect1
assert np.all(diff1 < error1)
assert output1.shape == expect1.shape

View File

@ -0,0 +1,83 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.ops = P.Less()
def construct(self, x, y):
return self.ops(x, y)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu_training
@pytest.mark.env_onecard
def test_net():
x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
y1_np = np.random.randint(1, 5, (2, 1, 4, 4)).astype(np.float32)
x2_np = np.random.randint(1, 5, (2, 1, 1, 4)).astype(np.float32)
y2_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
x3_np = np.random.randint(1, 5, 1).astype(np.float32)
y3_np = np.random.randint(1, 5, 1).astype(np.float32)
x4_np = np.array(768).astype(np.float32)
y4_np = np.array(3072.5).astype(np.float32)
x0 = Tensor(x0_np)
y0 = Tensor(y0_np)
x1 = Tensor(x1_np)
y1 = Tensor(y1_np)
x2 = Tensor(x2_np)
y2 = Tensor(y2_np)
x3 = Tensor(x3_np)
y3 = Tensor(y3_np)
x4 = Tensor(x4_np)
y4 = Tensor(y4_np)
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
net = Net()
out = net(x0, y0).asnumpy()
expect = x0_np < y0_np
assert np.all(out == expect)
assert out.shape == expect.shape
out = net(x1, y1).asnumpy()
expect = x1_np < y1_np
assert np.all(out == expect)
assert out.shape == expect.shape
out = net(x2, y2).asnumpy()
expect = x2_np < y2_np
assert np.all(out == expect)
assert out.shape == expect.shape
out = net(x3, y3).asnumpy()
expect = x3_np < y3_np
assert np.all(out == expect)
assert out.shape == expect.shape
out = net(x4, y4).asnumpy()
expect = x4_np < y4_np
assert np.all(out == expect)
assert out.shape == expect.shape

View File

@ -0,0 +1,56 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
class NetLog(nn.Cell):
def __init__(self):
super(NetLog, self).__init__()
self.log = P.Log()
def construct(self, x):
return self.log(x)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_log():
x0_np = np.random.uniform(1, 2, (2, 3, 4, 4)).astype(np.float32)
x1_np = np.random.uniform(1, 2, 1).astype(np.float32)
x0 = Tensor(x0_np)
x1 = Tensor(x1_np)
expect0 = np.log(x0_np)
expect1 = np.log(x1_np)
error0 = np.ones(shape=expect0.shape) * 1.0e-5
error1 = np.ones(shape=expect1.shape) * 1.0e-5
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
log = NetLog()
output0 = log(x0)
output1 = log(x1)
diff0 = output0.asnumpy() - expect0
assert np.all(diff0 < error0)
assert output0.shape == expect0.shape
diff1 = output1.asnumpy() - expect1
assert np.all(diff1 < error1)
assert output1.shape == expect1.shape

View File

@ -16,38 +16,53 @@
import numpy as np
import pytest
import mindspore.context as context
import mindspore.common.dtype as mstype
import mindspore.nn as nn
from mindspore import Tensor
from mindspore import Tensor, context
from mindspore.common.api import ms_function
from mindspore.common.initializer import initializer
from mindspore.common.parameter import Parameter
from mindspore.ops import operations as P
x = np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)
y = np.random.uniform(-2, 2, (1, 1, 1, 1)).astype(np.float32)
context.set_context(device_target='CPU')
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.mul = P.Mul()
self.x = Parameter(initializer(Tensor(x), x.shape), name='x3')
self.y = Parameter(initializer(Tensor(y), y.shape), name='y3')
@ms_function
def construct(self):
return self.mul(self.x, self.y)
def construct(self, x, y):
return self.mul(x, y)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_Mul():
def test_mul():
x0 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32))
y0 = Tensor(np.random.uniform(-2, 2, (1, 1, 1, 1)).astype(np.float32))
x1 = Tensor(np.random.uniform(-2, 2, (1, 3, 1, 4)).astype(np.float32))
y1 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32))
x2 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32))
y2 = Tensor(2, mstype.float32)
mul = Net()
output = mul()
print(x)
print(y)
print(output)
out = mul(x0, y0).asnumpy()
exp = x0.asnumpy() * y0.asnumpy()
diff = np.abs(out - exp)
err = np.ones(shape=exp.shape) * 1.0e-5
assert np.all(diff < err)
assert out.shape == exp.shape
out = mul(x1, y1).asnumpy()
exp = x1.asnumpy() * y1.asnumpy()
diff = np.abs(out - exp)
err = np.ones(shape=exp.shape) * 1.0e-5
assert np.all(diff < err)
assert out.shape == exp.shape
out = mul(x2, y2).asnumpy()
exp = x2.asnumpy() * y2.asnumpy()
diff = np.abs(out - exp)
err = np.ones(shape=exp.shape) * 1.0e-5
assert np.all(diff < err)
assert out.shape == exp.shape

View File

@ -0,0 +1,60 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.common.api import ms_function
from mindspore.ops import operations as P
from mindspore.ops.composite import GradOperation
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
class Grad(nn.Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.grad = GradOperation(get_all=True, sens_param=True)
self.network = network
@ms_function
def construct(self, input_, output_grad):
return self.grad(self.network)(input_, output_grad)
class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.ops = P.Neg()
def construct(self, x):
return self.ops(x)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_net():
x = np.random.randn(2, 3, 3, 4).astype(np.float32)
y_expect = -x
net = Net()
out = net(Tensor(x))
assert (out.asnumpy() == y_expect).all()
sens = np.random.randn(2, 3, 3, 4).astype(np.float32)
backword_net = Grad(Net())
output = backword_net(Tensor(x), Tensor(sens))
print(len(output))
print(output[0].asnumpy())

View File

@ -0,0 +1,58 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.ops = P.Pow()
def construct(self, x, y):
return self.ops(x, y)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu_training
@pytest.mark.env_onecard
def test_net():
x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
y1_np = np.array(3).astype(np.float32)
x0 = Tensor(x0_np)
y0 = Tensor(y0_np)
x1 = Tensor(x1_np)
y1 = Tensor(y1_np)
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
net = Net()
out = net(x0, y0).asnumpy()
expect = np.power(x0_np, y0_np)
assert np.all(out == expect)
assert out.shape == expect.shape
out = net(x1, y1).asnumpy()
expect = np.power(x1_np, y1_np)
assert np.all(out == expect)
assert out.shape == expect.shape

View File

@ -0,0 +1,95 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
class NetRealDiv(nn.Cell):
def __init__(self):
super(NetRealDiv, self).__init__()
self.divide = P.RealDiv()
def construct(self, x, y):
return self.divide(x, y)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu_training
@pytest.mark.env_onecard
def test_real_div():
x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
y1_np = np.random.randint(1, 5, (2, 1, 4, 4)).astype(np.float32)
x2_np = np.random.randint(1, 5, (2, 1, 1, 4)).astype(np.float32)
y2_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
x3_np = np.random.randint(1, 5, 1).astype(np.float32)
y3_np = np.random.randint(1, 5, 1).astype(np.float32)
x4_np = np.array(768).astype(np.float32)
y4_np = np.array(3072.5).astype(np.float32)
x0 = Tensor(x0_np)
y0 = Tensor(y0_np)
x1 = Tensor(x1_np)
y1 = Tensor(y1_np)
x2 = Tensor(x2_np)
y2 = Tensor(y2_np)
x3 = Tensor(x3_np)
y3 = Tensor(y3_np)
x4 = Tensor(x4_np)
y4 = Tensor(y4_np)
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
real_div = NetRealDiv()
output0 = real_div(x0, y0)
expect0 = np.divide(x0_np, y0_np)
diff0 = output0.asnumpy() - expect0
error0 = np.ones(shape=expect0.shape) * 1.0e-5
assert np.all(diff0 < error0)
assert output0.shape == expect0.shape
output1 = real_div(x1, y1)
expect1 = np.divide(x1_np, y1_np)
diff1 = output1.asnumpy() - expect1
error1 = np.ones(shape=expect1.shape) * 1.0e-5
assert np.all(diff1 < error1)
assert output1.shape == expect1.shape
output2 = real_div(x2, y2)
expect2 = np.divide(x2_np, y2_np)
diff2 = output2.asnumpy() - expect2
error2 = np.ones(shape=expect2.shape) * 1.0e-5
assert np.all(diff2 < error2)
assert output2.shape == expect2.shape
output3 = real_div(x3, y3)
expect3 = np.divide(x3_np, y3_np)
diff3 = output3.asnumpy() - expect3
error3 = np.ones(shape=expect3.shape) * 1.0e-5
assert np.all(diff3 < error3)
assert output3.shape == expect3.shape
output4 = real_div(x4, y4)
expect4 = np.divide(x4_np, y4_np)
diff4 = output4.asnumpy() - expect4
error4 = np.ones(shape=expect4.shape) * 1.0e-5
assert np.all(diff4 < error4)
assert output4.shape == expect4.shape

View File

@ -20,7 +20,9 @@ import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
from mindspore.ops.operations import _grad_ops as G
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
class NetReLU6(nn.Cell):
def __init__(self):
@ -30,6 +32,13 @@ class NetReLU6(nn.Cell):
def construct(self, x):
return self.relu6(x)
class NetReLU6Grad(nn.Cell):
def __init__(self):
super(NetReLU6Grad, self).__init__()
self.relu6_grad = G.ReLU6Grad()
def construct(self, x, dy):
return self.relu6_grad(dy, x)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@ -42,7 +51,26 @@ def test_relu6():
[5.9, 6, 6,],
[6, 1, 0.]]]]).astype(np.float32)
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
relu6 = NetReLU6()
output = relu6(x)
assert (output.asnumpy() == expect).all()
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_relu6_grad():
x = Tensor(np.array([[[[-1, 1, 10],
[5.9, 6.1, 6],
[10, 1, -1]]]]).astype(np.float32))
dy = Tensor(np.array([[[[1, 1, 1],
[1, 1, 1],
[1, 1, 1]]]]).astype(np.float32))
expect = np.array([[[[0, 1, 0,],
[1, 0, 1,],
[0, 1, 0,]]]]).astype(np.float32)
error = np.ones(shape=[3, 3]) * 1.0e-6
relu6_grad = NetReLU6Grad()
output = relu6_grad(x, dy)
diff = np.abs(output.asnumpy() - expect)
assert np.all(np.abs(diff) < error)

View File

@ -49,5 +49,5 @@ def test_relu_grad():
output = relu_grad()
expect = np.array([[[[0, 0, 1,], [0, 0, 0,], [1, 1, 0.]]]]).astype(np.float32)
error = np.ones(shape=[3, 3]) * 1.0e-6
diff = output.asnumpy() - expect
diff = np.abs(output.asnumpy() - expect)
assert np.all(diff < error)

View File

@ -0,0 +1,78 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
from mindspore.ops.operations import _grad_ops as G
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
class NetSigmoidGrad(nn.Cell):
def __init__(self):
super(NetSigmoidGrad, self).__init__()
self.sigmoid_grad = G.SigmoidGrad()
def construct(self, y, dy):
return self.sigmoid_grad(y, dy)
class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.ops = P.Sigmoid()
def construct(self, x):
return self.ops(x)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_net():
x = np.random.randn(2, 3, 3, 4).astype(np.float32)
y_expect = 1 / (1 + np.exp(-x))
net = Net()
out = net(Tensor(x))
diff = out.asnumpy() - y_expect
err = np.ones(shape=y_expect.shape) * 1.0e-5
assert np.all(diff < err)
assert out.shape == y_expect.shape
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_sigmoid_grad():
y = Tensor(np.array([[[[-1, 1, 2],
[1, -1, 1],
[2, 1, -1]]]]).astype(np.float32))
dy = Tensor(np.array([[[[-11, 2, 4],
[-1, 1, -1],
[-4, 4, -4]]]]).astype(np.float32))
expect = np.array([[[[22, 0, -8],
[0, -2, 0],
[8, 0, 8]]]]).astype(np.float32)
error = np.ones(shape=[1, 1, 3, 3]) * 1.0e-6
sigmoid_grad = NetSigmoidGrad()
output = sigmoid_grad(y, dy)
diff = np.abs(output.asnumpy() - expect)
assert np.all(abs(diff) < error)

View File

@ -0,0 +1,75 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
from mindspore.ops.operations import _grad_ops as G
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
class NetSqrtGrad(nn.Cell):
def __init__(self):
super(NetSqrtGrad, self).__init__()
self.sqrt_grad = G.SqrtGrad()
def construct(self, x, dx):
return self.sqrt_grad(x, dx)
class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.ops = P.Sqrt()
def construct(self, x):
return self.ops(x)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_net():
x = np.abs(np.random.randn(2, 3, 3, 4)).astype(np.float32)
y_expect = np.sqrt(x)
net = Net()
out = net(Tensor(x))
diff = out.asnumpy() - y_expect
err = np.ones(shape=y_expect.shape) * 1.0e-5
assert np.all(diff < err)
assert out.shape == y_expect.shape
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_sqrt_grad():
x = Tensor(np.array([[[[-1, 1, 10],
[5.9, 6.1, 6],
[10, 1, -1]]]]).astype(np.float32))
dx = Tensor(np.array([[[[1, 1, 1],
[2, 2, 2],
[3, 3, 3]]]]).astype(np.float32))
expect = np.array([[[[-0.5, 0.5, 0.05,],
[0.16949153, 0.16393442, 0.16666667,],
[0.15, 1.5, -1.5,]]]]).astype(np.float32)
error = np.ones(shape=[3, 3]) * 1.0e-6
sqrt_grad = NetSqrtGrad()
output = sqrt_grad(x, dx)
diff = np.abs(output.asnumpy() - expect)
assert np.all(np.abs(diff) < error)

View File

@ -0,0 +1,63 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.common.api import ms_function
from mindspore.ops import operations as P
from mindspore.ops.composite import GradOperation
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
class Grad(nn.Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.grad = GradOperation(get_all=True, sens_param=True)
self.network = network
@ms_function
def construct(self, input_, output_grad):
return self.grad(self.network)(input_, output_grad)
class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.ops = P.Square()
def construct(self, x):
return self.ops(x)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_net():
x = np.random.randn(2, 3, 3, 4).astype(np.float32)
y_expect = x * x
net = Net()
out = net(Tensor(x))
diff = out.asnumpy() - y_expect
err = np.ones(shape=y_expect.shape) * 1.0e-5
assert np.all(diff < err)
assert out.shape == y_expect.shape
sens = np.random.randn(2, 3, 3, 4).astype(np.float32)
backword_net = Grad(Net())
output = backword_net(Tensor(x), Tensor(sens))
print(len(output))
print(output[0].asnumpy())

View File

@ -0,0 +1,63 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.common.api import ms_function
from mindspore.ops import operations as P
from mindspore.ops.composite import GradOperation
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
class Grad(nn.Cell):
def __init__(self, network):
super(Grad, self).__init__()
self.grad = GradOperation(get_all=True, sens_param=True)
self.network = network
@ms_function
def construct(self, input_, output_grad):
return self.grad(self.network)(input_, output_grad)
class Net(nn.Cell):
def __init__(self):
super(Net, self).__init__()
self.ops = P.Tanh()
def construct(self, x):
return self.ops(x)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_net():
x = np.random.randn(2, 3, 3, 4).astype(np.float32)
y_expect = np.tanh(x)
net = Net()
out = net(Tensor(x))
diff = out.asnumpy() - y_expect
err = np.ones(shape=y_expect.shape) * 1.0e-5
assert np.all(diff < err)
assert out.shape == y_expect.shape
sens = np.random.randn(2, 3, 3, 4).astype(np.float32)
backword_net = Grad(Net())
output = backword_net(Tensor(x), Tensor(sens))
print(len(output))
print(output[0].asnumpy())

View File

@ -13,12 +13,15 @@
# limitations under the License.
# ============================================================================
import pytest
import numpy as np
from mindspore import Tensor
from mindspore.ops import operations as P
import pytest
import mindspore.common.dtype as mstype
import mindspore.nn as nn
import mindspore.context as context
from mindspore import Tensor, context
from mindspore.ops import operations as P
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
class TensorAdd(nn.Cell):
def __init__(self):
@ -34,10 +37,30 @@ class TensorAdd(nn.Cell):
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_tensor_add():
x = np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32)
y = np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32)
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
x0 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32))
y0 = Tensor(np.random.uniform(-2, 2, (1, 1, 1, 1)).astype(np.float32))
x1 = Tensor(np.random.uniform(-2, 2, (1, 3, 1, 4)).astype(np.float32))
y1 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32))
x2 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32))
y2 = Tensor(2, mstype.float32)
add = TensorAdd()
output = add(Tensor(x), Tensor(y))
assert (output.asnumpy() == x + y).all()
out = add(x0, y0).asnumpy()
exp = x0.asnumpy() + y0.asnumpy()
diff = np.abs(out - exp)
err = np.ones(shape=exp.shape) * 1.0e-5
assert np.all(diff < err)
assert out.shape == exp.shape
out = add(x1, y1).asnumpy()
exp = x1.asnumpy() + y1.asnumpy()
diff = np.abs(out - exp)
err = np.ones(shape=exp.shape) * 1.0e-5
assert np.all(diff < err)
assert out.shape == exp.shape
out = add(x2, y2).asnumpy()
exp = x2.asnumpy() + y2.asnumpy()
diff = np.abs(out - exp)
err = np.ones(shape=exp.shape) * 1.0e-5
assert np.all(diff < err)
assert out.shape == exp.shape