forked from mindspore-Ecosystem/mindspore
add some cpu operator
This commit is contained in:
parent
af78c12a73
commit
f2e9d9cfc7
|
@ -13,9 +13,10 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h"
|
||||
#include <thread>
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include "backend/kernel_compiler/cpu/arithmetic_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -52,13 +53,35 @@ void ArithmeticCPUKernel::Mul(const T *input1, const T *input2, T *out, size_t s
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
void ArithmeticCPUKernel::Div(const T *input1, const T *input2, T *out, size_t start, size_t end) {
|
||||
void ArithmeticCPUKernel::RealDiv(const T *input1, const T *input2, T *out, size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
auto div_number = input2[i];
|
||||
std::vector<size_t> idx;
|
||||
GenIndex(i, &idx);
|
||||
auto div_number = input2[idx[1]];
|
||||
if (div_number == 0) {
|
||||
MS_LOG(EXCEPTION) << "Cannot divided by 0!";
|
||||
}
|
||||
out[i] = input1[i] / div_number;
|
||||
out[i] = input1[idx[0]] / div_number;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ArithmeticCPUKernel::Pow(const T *input1, const T *input2, T *out, size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
std::vector<size_t> idx;
|
||||
GenIndex(i, &idx);
|
||||
auto x = static_cast<double>(input1[idx[0]]);
|
||||
auto y = static_cast<double>(input2[idx[1]]);
|
||||
out[i] = static_cast<T>(std::pow(x, y));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ArithmeticCPUKernel::Less(const T *input1, const T *input2, bool *out, size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
std::vector<size_t> idx;
|
||||
GenIndex(i, &idx);
|
||||
out[i] = input1[idx[0]] < input2[idx[1]];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -71,10 +94,16 @@ void ArithmeticCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
operate_type_ = SUB;
|
||||
} else if (kernel_name == prim::kPrimMul->name()) {
|
||||
operate_type_ = MUL;
|
||||
} else if (kernel_name == "Div") {
|
||||
operate_type_ = DIV;
|
||||
} else if (kernel_name == prim::kPrimRealDiv->name()) {
|
||||
operate_type_ = REALDIV;
|
||||
} else if (kernel_name == prim::kPrimPow->name()) {
|
||||
operate_type_ = POW;
|
||||
} else if (kernel_name == prim::kPrimLess->name()) {
|
||||
operate_type_ = LESS;
|
||||
} else if (kernel_name == prim::kPrimAssignAdd->name()) {
|
||||
operate_type_ = ASSIGNADD;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Not support " << kernel_name;
|
||||
}
|
||||
|
||||
input_shape0_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
|
@ -145,14 +174,45 @@ void ArithmeticCPUKernel::GenIndex(size_t num, std::vector<size_t> *idx) {
|
|||
idx->push_back(idx0);
|
||||
idx->push_back(idx1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ArithmeticCPUKernel::LaunchLess(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
|
||||
T *input1 = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
T *input2 = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
bool *output = reinterpret_cast<bool *>(outputs[0]->addr);
|
||||
|
||||
size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1;
|
||||
auto max_thread_num = std::thread::hardware_concurrency();
|
||||
size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num;
|
||||
MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num;
|
||||
std::vector<std::thread> threads;
|
||||
threads.reserve(thread_num);
|
||||
size_t start = 0;
|
||||
size_t once_compute_size = (lens + thread_num - 1) / thread_num;
|
||||
while (start < lens) {
|
||||
size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size);
|
||||
threads.emplace_back(std::thread(&ArithmeticCPUKernel::Less<T>, this, input1, input2, output, start, end));
|
||||
start += once_compute_size;
|
||||
}
|
||||
for (size_t i = 0; i < threads.size(); ++i) {
|
||||
threads[i].join();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ArithmeticCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
|
||||
if (operate_type_ == LESS) {
|
||||
LaunchLess<T>(inputs, outputs);
|
||||
return;
|
||||
}
|
||||
T *input1 = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
T *input2 = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
T *output = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
auto lens = outputs[0]->size / sizeof(T);
|
||||
size_t thread_num = lens < 128 * 24 ? std::ceil(lens / 128.0) : 24;
|
||||
MS_LOG(INFO) << "lens=" << lens << "; use thread_num=" << thread_num;
|
||||
|
||||
size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1;
|
||||
auto max_thread_num = std::thread::hardware_concurrency();
|
||||
size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num;
|
||||
MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num;
|
||||
std::vector<std::thread> threads;
|
||||
threads.reserve(thread_num);
|
||||
size_t start = 0;
|
||||
|
@ -165,10 +225,14 @@ void ArithmeticCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, co
|
|||
threads.emplace_back(std::thread(&ArithmeticCPUKernel::Sub<T>, this, input1, input2, output, start, end));
|
||||
} else if (operate_type_ == MUL) {
|
||||
threads.emplace_back(std::thread(&ArithmeticCPUKernel::Mul<T>, this, input1, input2, output, start, end));
|
||||
} else if (operate_type_ == DIV) {
|
||||
threads.emplace_back(std::thread(&ArithmeticCPUKernel::Div<T>, this, input1, input2, output, start, end));
|
||||
} else if (operate_type_ == REALDIV) {
|
||||
threads.emplace_back(std::thread(&ArithmeticCPUKernel::RealDiv<T>, this, input1, input2, output, start, end));
|
||||
} else if (operate_type_ == POW) {
|
||||
threads.emplace_back(std::thread(&ArithmeticCPUKernel::Pow<T>, this, input1, input2, output, start, end));
|
||||
} else if (operate_type_ == ASSIGNADD) {
|
||||
threads.emplace_back(std::thread(&ArithmeticCPUKernel::AssignAdd<T>, this, input1, input2, output, start, end));
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Not support " << operate_type_;
|
||||
}
|
||||
start += once_compute_size;
|
||||
}
|
||||
|
|
|
@ -15,8 +15,8 @@
|
|||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARITHMETIC_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARITHMETIC_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
|
@ -31,7 +31,8 @@ class ArithmeticCPUKernel : public CPUKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
template <typename T>
|
||||
void LaunchLess(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
|
||||
template <typename T>
|
||||
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
|
||||
|
||||
|
@ -44,9 +45,13 @@ class ArithmeticCPUKernel : public CPUKernel {
|
|||
template <typename T>
|
||||
void Mul(const T *input1, const T *input2, T *out, size_t start, size_t end);
|
||||
template <typename T>
|
||||
void Div(const T *input1, const T *input2, T *out, size_t start, size_t end);
|
||||
void RealDiv(const T *input1, const T *input2, T *out, size_t start, size_t end);
|
||||
template <typename T>
|
||||
void Pow(const T *input1, const T *input2, T *out, size_t start, size_t end);
|
||||
template <typename T>
|
||||
void AssignAdd(T *input1, const T *input2, T *out, size_t start, size_t end);
|
||||
template <typename T>
|
||||
void Less(const T *input1, const T *input2, bool *out, size_t start, size_t end);
|
||||
std::vector<size_t> input_shape0_;
|
||||
std::vector<size_t> input_shape1_;
|
||||
std::vector<size_t> input_element_num0_;
|
||||
|
@ -66,6 +71,34 @@ MS_REG_CPU_KERNEL(
|
|||
MS_REG_CPU_KERNEL(
|
||||
Sub, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
ArithmeticCPUKernel);
|
||||
MS_REG_CPU_KERNEL(
|
||||
Pow, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
ArithmeticCPUKernel);
|
||||
MS_REG_CPU_KERNEL(
|
||||
Pow, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
ArithmeticCPUKernel);
|
||||
MS_REG_CPU_KERNEL(
|
||||
Pow, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
ArithmeticCPUKernel);
|
||||
MS_REG_CPU_KERNEL(
|
||||
RealDiv, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
ArithmeticCPUKernel);
|
||||
MS_REG_CPU_KERNEL(
|
||||
RealDiv,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
ArithmeticCPUKernel);
|
||||
MS_REG_CPU_KERNEL(
|
||||
RealDiv, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
ArithmeticCPUKernel);
|
||||
MS_REG_CPU_KERNEL(
|
||||
Less, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool),
|
||||
ArithmeticCPUKernel);
|
||||
MS_REG_CPU_KERNEL(
|
||||
Less, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeBool),
|
||||
ArithmeticCPUKernel);
|
||||
MS_REG_CPU_KERNEL(
|
||||
Less, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeBool),
|
||||
ArithmeticCPUKernel);
|
||||
MS_REG_CPU_KERNEL(
|
||||
AssignAdd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
ArithmeticCPUKernel);
|
||||
|
|
|
@ -13,10 +13,10 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h"
|
||||
#include <cmath>
|
||||
#include <thread>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include "backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -30,9 +30,9 @@ void Square(const T *in, T *out, size_t start, size_t end) {
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
void Sqrt(const T *in, T *out, size_t start, size_t end) {
|
||||
void Neg(const T *in, T *out, size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
out[i] = sqrtf(in[i]);
|
||||
out[i] = -in[i];
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
@ -42,8 +42,8 @@ void ArithmeticSelfCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
if (kernel_name == prim::kPrimSquare->name()) {
|
||||
operate_type_ = SQUARE;
|
||||
} else if (kernel_name == prim::kPrimSqrt->name()) {
|
||||
operate_type_ = SQRT;
|
||||
} else if (kernel_name == prim::kPrimNeg->name()) {
|
||||
operate_type_ = NEG;
|
||||
}
|
||||
dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);
|
||||
}
|
||||
|
@ -66,10 +66,11 @@ void ArithmeticSelfCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs
|
|||
const std::vector<AddressPtr> &outputs) {
|
||||
T *input = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
T *output = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
auto lens = inputs[0]->size / sizeof(T);
|
||||
MS_LOG(INFO) << "lens=" << lens;
|
||||
size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1;
|
||||
|
||||
const size_t thread_num = 24;
|
||||
auto max_thread_num = std::thread::hardware_concurrency();
|
||||
size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num;
|
||||
MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num;
|
||||
std::vector<std::thread> threads;
|
||||
threads.reserve(thread_num);
|
||||
size_t start = 0;
|
||||
|
@ -78,8 +79,8 @@ void ArithmeticSelfCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs
|
|||
size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size);
|
||||
if (operate_type_ == SQUARE) {
|
||||
threads.emplace_back(std::thread(Square<T>, input, output, start, end));
|
||||
} else if (operate_type_ == SQRT) {
|
||||
threads.emplace_back(std::thread(Sqrt<T>, input, output, start, end));
|
||||
} else if (operate_type_ == NEG) {
|
||||
threads.emplace_back(std::thread(Neg<T>, input, output, start, end));
|
||||
}
|
||||
start += once_compute_size;
|
||||
}
|
||||
|
|
|
@ -15,8 +15,8 @@
|
|||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARITHMETIC_SELF_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARITHMETIC_SELF_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
|
@ -40,10 +40,12 @@ class ArithmeticSelfCPUKernel : public CPUKernel {
|
|||
TypeId dtype_{kTypeUnknown};
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
ArithmeticSelfCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
ArithmeticSelfCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Neg, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
ArithmeticSelfCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Neg, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
ArithmeticSelfCPUKernel);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <cmath>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include "backend/kernel_compiler/cpu/cast_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
||||
template <typename S, typename T>
|
||||
void Cast(const S *in, T *out, size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
out[i] = static_cast<T>(in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, typename T>
|
||||
void LaunchCast(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs) {
|
||||
S *input = reinterpret_cast<S *>(inputs[0]->addr);
|
||||
T *output = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
MS_LOG(DEBUG) << "Type source: " << typeid(S).name() << "; target: " << typeid(T).name();
|
||||
|
||||
size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1;
|
||||
auto max_thread_num = std::thread::hardware_concurrency();
|
||||
size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num;
|
||||
MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num;
|
||||
std::vector<std::thread> threads;
|
||||
threads.reserve(thread_num);
|
||||
size_t start = 0;
|
||||
size_t once_compute_size = (lens + thread_num - 1) / thread_num;
|
||||
while (start < lens) {
|
||||
size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size);
|
||||
threads.emplace_back(std::thread(Cast<S, T>, input, output, start, end));
|
||||
start += once_compute_size;
|
||||
}
|
||||
for (size_t i = 0; i < threads.size(); ++i) {
|
||||
threads[i].join();
|
||||
}
|
||||
}
|
||||
|
||||
void CastCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
source_dtype = AnfAlgo::GetPrevNodeOutputDeviceDataType(kernel_node, 0);
|
||||
target_dtype = AnfAlgo::GetOutputInferDataType(kernel_node, 0);
|
||||
}
|
||||
|
||||
bool CastCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> & /*workspace*/,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
using TypePair =
|
||||
std::function<void(const std::vector<kernel::AddressPtr> &, const std::vector<kernel::AddressPtr> &)>;
|
||||
std::map<TypeId, std::map<TypeId, TypePair>> mode_map;
|
||||
mode_map[kNumberTypeFloat32][kNumberTypeFloat32] = LaunchCast<float, float>;
|
||||
mode_map[kNumberTypeFloat32][kNumberTypeInt32] = LaunchCast<float, int>;
|
||||
mode_map[kNumberTypeFloat32][kNumberTypeBool] = LaunchCast<float, bool>;
|
||||
mode_map[kNumberTypeInt32][kNumberTypeFloat32] = LaunchCast<int, float>;
|
||||
mode_map[kNumberTypeInt32][kNumberTypeInt32] = LaunchCast<int, int>;
|
||||
mode_map[kNumberTypeInt32][kNumberTypeBool] = LaunchCast<int, bool>;
|
||||
mode_map[kNumberTypeBool][kNumberTypeFloat32] = LaunchCast<bool, float>;
|
||||
mode_map[kNumberTypeBool][kNumberTypeBool] = LaunchCast<bool, bool>;
|
||||
mode_map[kNumberTypeBool][kNumberTypeInt32] = LaunchCast<bool, int>;
|
||||
mode_map[source_dtype][target_dtype](inputs, outputs);
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,54 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CAST_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CAST_CPU_KERNEL_H_
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
||||
class CastCPUKernel : public CPUKernel {
|
||||
public:
|
||||
CastCPUKernel() = default;
|
||||
~CastCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
private:
|
||||
TypeId source_dtype{kTypeUnknown};
|
||||
TypeId target_dtype{kTypeUnknown};
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), CastCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32), CastCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeBool), CastCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), CastCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeFloat32), CastCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool), CastCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), CastCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeInt32), CastCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Cast, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeFloat32), CastCPUKernel);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CAST_CPU_KERNEL_H_
|
|
@ -15,15 +15,14 @@
|
|||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CPU_KERNEL_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
#include "ir/anf.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "ir/anf.h"
|
||||
|
||||
using mindspore::kernel::Address;
|
||||
using mindspore::kernel::AddressPtr;
|
||||
|
@ -52,7 +51,26 @@ const char END[] = "end";
|
|||
const char SIZE[] = "size";
|
||||
const char USE_NESTEROV[] = "use_nesterov";
|
||||
const char GROUP[] = "group";
|
||||
enum OperateType { ADD = 0, SUB, MUL, DIV, SQUARE, SQRT, ASSIGNADD };
|
||||
|
||||
enum OperateType {
|
||||
ADD = 0,
|
||||
SUB,
|
||||
MUL,
|
||||
DIV,
|
||||
SQUARE,
|
||||
SQRT,
|
||||
POW,
|
||||
REALDIV,
|
||||
NEG,
|
||||
LESS,
|
||||
ASSIGNADD,
|
||||
RELUGRAD,
|
||||
RELU6GRAD,
|
||||
ABSGRAD,
|
||||
TANHGRAD,
|
||||
SQRTGRAD,
|
||||
SIGMOIDGRAD
|
||||
};
|
||||
|
||||
class CPUKernel : public kernel::KernelMod {
|
||||
public:
|
||||
|
|
|
@ -0,0 +1,177 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <cmath>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include "backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T>
|
||||
void EltWiseGradCPUKernel::ReluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
if (input2[i] > 0) {
|
||||
out[i] = input1[i];
|
||||
} else {
|
||||
out[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void EltWiseGradCPUKernel::ReLU6Grad(const T *input1, const T *input2, T *out, size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
if (input2[i] > 0 && input2[i] <= 6) {
|
||||
out[i] = input1[i];
|
||||
} else {
|
||||
out[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void EltWiseGradCPUKernel::AbsGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
if (input1[i] > 0) {
|
||||
out[i] = input2[i];
|
||||
} else if (input1[i] < 0) {
|
||||
out[i] = -input2[i];
|
||||
} else {
|
||||
out[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void EltWiseGradCPUKernel::SigmoidGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
out[i] = input2[i] * input1[i] * (1 - input1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void EltWiseGradCPUKernel::SqrtGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
out[i] = input2[i] / (input1[i] * 2);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void EltWiseGradCPUKernel::TanhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
T tmp = (1 - input1[i]);
|
||||
out[i] = input2[i] * tmp * tmp;
|
||||
}
|
||||
}
|
||||
|
||||
void EltWiseGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
if (kernel_name == "ReluGrad") {
|
||||
operate_type_ = RELUGRAD;
|
||||
} else if (kernel_name == "ReLU6Grad") {
|
||||
operate_type_ = RELU6GRAD;
|
||||
} else if (kernel_name == "SigmoidGrad") {
|
||||
operate_type_ = SIGMOIDGRAD;
|
||||
} else if (kernel_name == "AbsGrad") {
|
||||
operate_type_ = ABSGRAD;
|
||||
} else if (kernel_name == "TanhGrad") {
|
||||
operate_type_ = TANHGRAD;
|
||||
} else if (kernel_name == "SqrtGrad") {
|
||||
operate_type_ = SQRTGRAD;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Not support " << kernel_name;
|
||||
}
|
||||
|
||||
input_shape0_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
input_shape1_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
if (output_shape_.size() == 0) {
|
||||
output_shape_.insert(output_shape_.begin(), 1);
|
||||
}
|
||||
size_t l = input_shape0_.size();
|
||||
for (size_t i = 0; i < output_shape_.size() - l; ++i) {
|
||||
input_shape0_.insert(input_shape0_.begin(), 1);
|
||||
}
|
||||
l = input_shape1_.size();
|
||||
for (size_t i = 0; i < output_shape_.size() - l; ++i) {
|
||||
input_shape1_.insert(input_shape1_.begin(), 1);
|
||||
}
|
||||
CPUKernelUtils::GetElementNumEveryDim(input_shape0_, &input_element_num0_);
|
||||
CPUKernelUtils::GetElementNumEveryDim(input_shape1_, &input_element_num1_);
|
||||
CPUKernelUtils::GetElementNumEveryDim(output_shape_, &output_element_num_);
|
||||
dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);
|
||||
if (dtype_ != AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 1)) {
|
||||
MS_LOG(EXCEPTION) << "Input0 and input1 must has the same data type";
|
||||
}
|
||||
}
|
||||
|
||||
bool EltWiseGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> & /*workspace*/,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
if (dtype_ == kNumberTypeInt32) {
|
||||
LaunchKernel<int>(inputs, outputs);
|
||||
} else if (dtype_ == kNumberTypeFloat32) {
|
||||
LaunchKernel<float>(inputs, outputs);
|
||||
} else if (dtype_ == kNumberTypeInt64) {
|
||||
LaunchKernel<int64_t>(inputs, outputs);
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Only support int32, float32, but actual data type is " << TypeIdLabel(dtype_);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void EltWiseGradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
|
||||
T *input1 = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
T *input2 = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
T *output = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
|
||||
size_t lens = outputs[0]->size > 0 ? static_cast<size_t>(outputs[0]->size / sizeof(T)) : 1;
|
||||
auto max_thread_num = std::thread::hardware_concurrency();
|
||||
size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num;
|
||||
MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num;
|
||||
std::vector<std::thread> threads;
|
||||
threads.reserve(thread_num);
|
||||
size_t start = 0;
|
||||
size_t once_compute_size = (lens + thread_num - 1) / thread_num;
|
||||
while (start < lens) {
|
||||
size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size);
|
||||
if (operate_type_ == RELUGRAD) {
|
||||
threads.emplace_back(std::thread(&EltWiseGradCPUKernel::ReluGrad<T>, this, input1, input2, output, start, end));
|
||||
} else if (operate_type_ == RELU6GRAD) {
|
||||
threads.emplace_back(std::thread(&EltWiseGradCPUKernel::ReLU6Grad<T>, this, input1, input2, output, start, end));
|
||||
} else if (operate_type_ == ABSGRAD) {
|
||||
threads.emplace_back(std::thread(&EltWiseGradCPUKernel::AbsGrad<T>, this, input1, input2, output, start, end));
|
||||
} else if (operate_type_ == SIGMOIDGRAD) {
|
||||
threads.emplace_back(
|
||||
std::thread(&EltWiseGradCPUKernel::SigmoidGrad<T>, this, input1, input2, output, start, end));
|
||||
} else if (operate_type_ == TANHGRAD) {
|
||||
threads.emplace_back(std::thread(&EltWiseGradCPUKernel::TanhGrad<T>, this, input1, input2, output, start, end));
|
||||
} else if (operate_type_ == SQRTGRAD) {
|
||||
threads.emplace_back(std::thread(&EltWiseGradCPUKernel::SqrtGrad<T>, this, input1, input2, output, start, end));
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Not support " << operate_type_;
|
||||
}
|
||||
start += once_compute_size;
|
||||
}
|
||||
for (size_t i = 0; i < threads.size(); ++i) {
|
||||
threads[i].join();
|
||||
}
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,87 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_GRAD_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_GRAD_CPU_KERNEL_H_
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class EltWiseGradCPUKernel : public CPUKernel {
|
||||
public:
|
||||
EltWiseGradCPUKernel() = default;
|
||||
~EltWiseGradCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
template <typename T>
|
||||
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
void ReluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end);
|
||||
template <typename T>
|
||||
void ReLU6Grad(const T *input1, const T *input2, T *out, size_t start, size_t end);
|
||||
template <typename T>
|
||||
void AbsGrad(const T *input1, const T *input2, T *out, size_t start, size_t end);
|
||||
template <typename T>
|
||||
void SigmoidGrad(const T *input1, const T *input2, T *out, size_t start, size_t end);
|
||||
template <typename T>
|
||||
void SqrtGrad(const T *input1, const T *input2, T *out, size_t start, size_t end);
|
||||
template <typename T>
|
||||
void TanhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end);
|
||||
std::vector<size_t> input_shape0_;
|
||||
std::vector<size_t> input_shape1_;
|
||||
std::vector<size_t> input_element_num0_;
|
||||
std::vector<size_t> input_element_num1_;
|
||||
std::vector<size_t> output_shape_;
|
||||
std::vector<size_t> output_element_num_;
|
||||
OperateType operate_type_{RELUGRAD};
|
||||
TypeId dtype_{kTypeUnknown};
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(
|
||||
ReluGrad,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
EltWiseGradCPUKernel);
|
||||
MS_REG_CPU_KERNEL(
|
||||
ReLU6Grad,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
EltWiseGradCPUKernel);
|
||||
MS_REG_CPU_KERNEL(
|
||||
AbsGrad,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
EltWiseGradCPUKernel);
|
||||
MS_REG_CPU_KERNEL(
|
||||
SigmoidGrad,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
EltWiseGradCPUKernel);
|
||||
MS_REG_CPU_KERNEL(
|
||||
SqrtGrad,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
EltWiseGradCPUKernel);
|
||||
MS_REG_CPU_KERNEL(
|
||||
TanhGrad,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
EltWiseGradCPUKernel);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_GRAD_CPU_KERNEL_H_
|
|
@ -0,0 +1,76 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h"
|
||||
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "utils/ms_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
dnnl::eltwise_forward::desc EltWiseCPUKernel::GetForwardEltwiseDesc(const CNodePtr &kernel_node,
|
||||
dnnl::memory::desc src_desc) {
|
||||
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
if (kernel_name == "ReLU") {
|
||||
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_relu, src_desc, 0.0);
|
||||
} else if (kernel_name == "ReLU6") {
|
||||
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_clip, src_desc, 0.0, 6.0);
|
||||
} else if (kernel_name == "Abs") {
|
||||
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_abs, src_desc);
|
||||
} else if (kernel_name == "Exp") {
|
||||
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_exp, src_desc);
|
||||
} else if (kernel_name == "Log") {
|
||||
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_log, src_desc);
|
||||
} else if (kernel_name == "Sigmoid") {
|
||||
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_logistic, src_desc);
|
||||
} else if (kernel_name == "Sqrt") {
|
||||
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_sqrt, src_desc);
|
||||
} else if (kernel_name == "Square") {
|
||||
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_square, src_desc);
|
||||
} else if (kernel_name == "Tanh") {
|
||||
return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_tanh, src_desc);
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Eltwise operators don't support " << kernel_name;
|
||||
}
|
||||
}
|
||||
|
||||
void EltWiseCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
|
||||
|
||||
auto desc = GetForwardEltwiseDesc(kernel_node, src_desc);
|
||||
auto prim_desc = dnnl::eltwise_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
|
||||
primitive_ = std::make_shared<dnnl::eltwise_forward>(prim_desc);
|
||||
|
||||
AddArgument(DNNL_ARG_SRC, src_desc);
|
||||
AddArgument(DNNL_ARG_DST, src_desc);
|
||||
}
|
||||
|
||||
bool EltWiseCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> & /*workspace*/,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
if (inputs.empty() || outputs.empty()) {
|
||||
MS_LOG(EXCEPTION) << "error input output size!";
|
||||
}
|
||||
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
|
||||
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
|
||||
ExecutePrimitive();
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,60 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_CPU_KERNEL_H_
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class EltWiseCPUKernel : public MKLCPUKernel {
|
||||
public:
|
||||
EltWiseCPUKernel() = default;
|
||||
~EltWiseCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
private:
|
||||
dnnl::eltwise_forward::desc GetForwardEltwiseDesc(const CNodePtr &kernel_node, dnnl::memory::desc src_desc);
|
||||
dnnl::prop_kind DnnlForward = dnnl::prop_kind::forward_training;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
EltWiseCPUKernel);
|
||||
MS_REG_CPU_KERNEL(ReLU6, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
EltWiseCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Abs, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
EltWiseCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Exp, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
EltWiseCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Log, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
EltWiseCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Sigmoid, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
EltWiseCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Sqrt, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
EltWiseCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
EltWiseCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Tanh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
EltWiseCPUKernel);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ELTWISE_CPU_KERNEL_H_
|
|
@ -13,12 +13,11 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_cpu_kernel.h"
|
||||
#include "utils/ms_utils.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "utils/ms_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
|
|
@ -15,9 +15,8 @@
|
|||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_CPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -74,4 +73,4 @@ MS_REG_CPU_KERNEL(BatchNorm,
|
|||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONV2D_CPU_KERNEL_H_
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_CPU_KERNEL_H_
|
||||
|
|
|
@ -0,0 +1,110 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/fused_batch_norm_gard_cpu_kernel.h"
|
||||
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "utils/ms_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
void FusedBatchNormGradCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
|
||||
CPUKernel::InitInputOutputSize(kernel_node);
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
size_t type_size = sizeof(float);
|
||||
std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
size_t tensor_size = shape[1] * 2 * type_size;
|
||||
// [2, c] to store scale and bias
|
||||
workspace_size_list_.emplace_back(tensor_size);
|
||||
// [2, c] to store diff_scale and diff_bias
|
||||
workspace_size_list_.emplace_back(tensor_size);
|
||||
}
|
||||
|
||||
void FusedBatchNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
std::vector<size_t> x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
if (x_shape.size() != 4) {
|
||||
MS_LOG(EXCEPTION) << "Fused batchnorm only support nchw input!";
|
||||
}
|
||||
batch_size = x_shape[0];
|
||||
channel = x_shape[1];
|
||||
hw_size = x_shape[2] * x_shape[3];
|
||||
nhw_size = x_shape[0] * hw_size;
|
||||
dnnl::memory::desc x_desc = GetDefaultMemDesc(x_shape);
|
||||
dnnl::memory::desc scale_bias_desc = GetDefaultMemDesc({2, channel});
|
||||
auto epsilon = AnfAlgo::GetNodeAttr<float>(kernel_node, "epsilon");
|
||||
auto prop_kind = dnnl::prop_kind::forward_training;
|
||||
auto normalization_flags = dnnl::normalization_flags::use_scale_shift;
|
||||
|
||||
// fused batch normalization forward description
|
||||
dnnl::batch_normalization_forward::desc desc =
|
||||
dnnl::batch_normalization_forward::desc(prop_kind, x_desc, epsilon, normalization_flags);
|
||||
auto forward_prim_desc = dnnl::batch_normalization_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
|
||||
|
||||
// fused batch normalization backward description
|
||||
dnnl::batch_normalization_backward::desc backward_desc =
|
||||
dnnl::batch_normalization_backward::desc(dnnl::prop_kind::backward, x_desc, x_desc, epsilon, normalization_flags);
|
||||
auto backward_prim_desc = dnnl::batch_normalization_backward::primitive_desc(
|
||||
backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc);
|
||||
primitive_ = std::make_shared<dnnl::batch_normalization_backward>(backward_prim_desc);
|
||||
AddArgument(DNNL_ARG_SRC, x_desc);
|
||||
AddArgument(DNNL_ARG_MEAN, forward_prim_desc.mean_desc());
|
||||
AddArgument(DNNL_ARG_VARIANCE, forward_prim_desc.variance_desc());
|
||||
AddArgument(DNNL_ARG_SCALE_SHIFT, scale_bias_desc);
|
||||
AddArgument(DNNL_ARG_WORKSPACE, forward_prim_desc.workspace_desc());
|
||||
AddArgument(DNNL_ARG_DST, x_desc);
|
||||
AddArgument(DNNL_ARG_DIFF_DST, x_desc);
|
||||
AddArgument(DNNL_ARG_DIFF_SRC, x_desc);
|
||||
AddArgument(DNNL_ARG_DIFF_SCALE_SHIFT, scale_bias_desc);
|
||||
}
|
||||
|
||||
bool FusedBatchNormGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &workspace,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
if (inputs.size() < 5 || outputs.empty()) {
|
||||
MS_LOG(EXCEPTION) << "Error input output size!";
|
||||
}
|
||||
auto wksp_in = reinterpret_cast<float *>(workspace[0]->addr);
|
||||
auto scale_ret = memcpy_s(wksp_in, workspace[0]->size, inputs[2]->addr, inputs[2]->size);
|
||||
auto max_size = workspace[0]->size - inputs[2]->size;
|
||||
auto bias_ret = memcpy_s(wksp_in + (inputs[2]->size / sizeof(float)), max_size, inputs[3]->addr, inputs[3]->size);
|
||||
if (scale_ret != 0 || bias_ret != 0) {
|
||||
MS_LOG(EXCEPTION) << "Memcpy_s error.";
|
||||
return false;
|
||||
}
|
||||
|
||||
SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr);
|
||||
SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr);
|
||||
SetArgumentHandle(DNNL_ARG_MEAN, inputs[4]->addr);
|
||||
SetArgumentHandle(DNNL_ARG_VARIANCE, inputs[5]->addr);
|
||||
SetArgumentHandle(DNNL_ARG_SCALE_SHIFT, workspace[0]->addr);
|
||||
SetArgumentHandle(DNNL_ARG_DIFF_SRC, outputs[0]->addr);
|
||||
SetArgumentHandle(DNNL_ARG_DIFF_SCALE_SHIFT, workspace[1]->addr);
|
||||
ExecutePrimitive();
|
||||
|
||||
auto wksp_out = reinterpret_cast<float *>(workspace[1]->addr);
|
||||
auto diff_scale_ret = memcpy_s(outputs[1]->addr, outputs[1]->size, wksp_out, inputs[2]->size);
|
||||
auto diff_bias_ret =
|
||||
memcpy_s(outputs[2]->addr, outputs[2]->size, wksp_out + (outputs[1]->size / sizeof(float)), inputs[3]->size);
|
||||
if (diff_scale_ret != 0 || diff_bias_ret != 0) {
|
||||
MS_LOG(EXCEPTION) << "Memcpy_s error.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,61 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_GRAD_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_GRAD_CPU_KERNEL_H_
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class FusedBatchNormGradCPUKernel : public MKLCPUKernel {
|
||||
public:
|
||||
FusedBatchNormGradCPUKernel() = default;
|
||||
~FusedBatchNormGradCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
protected:
|
||||
void InitInputOutputSize(const CNodePtr &kernel_node) override;
|
||||
|
||||
private:
|
||||
float momentum{0.9};
|
||||
size_t batch_size{0};
|
||||
size_t channel{0};
|
||||
size_t hw_size{0};
|
||||
size_t nhw_size{0};
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(FusedBatchNormGradCPU,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
FusedBatchNormGradCPUKernel)
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_FUSED_BATCH_NORM_GRAD_CPU_KERNEL_H_
|
|
@ -25,24 +25,53 @@ void MulCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
std::vector<size_t> src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
std::vector<size_t> src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
if (src0_shape.size() != src1_shape.size() && src1_shape.size() > 1) {
|
||||
MS_LOG(EXCEPTION) << "mul only support same dim input or tensor * scalar " << src0_shape.size() << " vs "
|
||||
<< src1_shape.size();
|
||||
if (src1_shape.size() != src0_shape.size()) {
|
||||
if (src0_shape.size() == 0) {
|
||||
need_swap_ = true;
|
||||
for (size_t i = 0; i < src1_shape.size(); ++i) {
|
||||
src0_shape.emplace_back(1);
|
||||
}
|
||||
if (src1_shape.size() < src0_shape.size()) {
|
||||
for (size_t i = src1_shape.size(); i < src0_shape.size(); ++i) {
|
||||
} else if (src1_shape.size() == 0) {
|
||||
for (size_t i = 0; i < src0_shape.size(); ++i) {
|
||||
src1_shape.emplace_back(1);
|
||||
}
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Invalid broadcast! " << src0_shape << " vs " << src1_shape;
|
||||
}
|
||||
dnnl::memory::desc src0_mem_desc = GetDefaultMemDesc(src0_shape);
|
||||
dnnl::memory::desc src1_mem_desc = GetDefaultMemDesc(src1_shape);
|
||||
dnnl::memory::desc dst_mem_desc = GetDefaultMemDesc(dst_shape);
|
||||
dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_mul, src0_mem_desc, src1_mem_desc, dst_mem_desc);
|
||||
} else {
|
||||
bool visit_src0 = false;
|
||||
bool visit_src1 = false;
|
||||
for (size_t i = 0; i < src0_shape.size(); ++i) {
|
||||
if (src0_shape[i] != src1_shape[i]) {
|
||||
if (src0_shape[i] == 1 && !visit_src1) {
|
||||
need_swap_ = true;
|
||||
visit_src0 = true;
|
||||
} else if (src1_shape[i] == 1 && !visit_src0) {
|
||||
need_swap_ = false;
|
||||
visit_src1 = true;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Invalid broadcast! " << src0_shape << " vs " << src1_shape;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
dnnl::memory::desc src0_desc;
|
||||
dnnl::memory::desc src1_desc;
|
||||
if (need_swap_) {
|
||||
src0_desc = GetDefaultMemDesc(src1_shape);
|
||||
src1_desc = GetDefaultMemDesc(src0_shape);
|
||||
} else {
|
||||
src0_desc = GetDefaultMemDesc(src0_shape);
|
||||
src1_desc = GetDefaultMemDesc(src1_shape);
|
||||
}
|
||||
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
||||
dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_mul, src0_desc, src1_desc, dst_desc);
|
||||
auto prim_desc = dnnl::binary::primitive_desc(desc, MKLKernelEngine::Get().engine());
|
||||
primitive_ = std::make_shared<dnnl::binary>(prim_desc);
|
||||
AddArgument(DNNL_ARG_SRC_0, src0_mem_desc);
|
||||
AddArgument(DNNL_ARG_SRC_1, src1_mem_desc);
|
||||
AddArgument(DNNL_ARG_DST, dst_mem_desc);
|
||||
|
||||
AddArgument(DNNL_ARG_SRC_0, src0_desc);
|
||||
AddArgument(DNNL_ARG_SRC_1, src1_desc);
|
||||
AddArgument(DNNL_ARG_DST, dst_desc);
|
||||
}
|
||||
|
||||
bool MulCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
|
@ -51,8 +80,13 @@ bool MulCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
|||
if (inputs.size() < 2 || outputs.empty()) {
|
||||
MS_LOG(EXCEPTION) << "mul error input output size!";
|
||||
}
|
||||
if (need_swap_) {
|
||||
SetArgumentHandle(DNNL_ARG_SRC_0, inputs[1]->addr);
|
||||
SetArgumentHandle(DNNL_ARG_SRC_1, inputs[0]->addr);
|
||||
} else {
|
||||
SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr);
|
||||
SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr);
|
||||
}
|
||||
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
|
||||
ExecutePrimitive();
|
||||
return true;
|
||||
|
|
|
@ -31,6 +31,9 @@ class MulCPUKernel : public MKLCPUKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
private:
|
||||
bool need_swap_{false};
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(
|
||||
|
|
|
@ -1,59 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/relu_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "utils/ms_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
void ReluCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
if (src_shape.size() != 4 && src_shape.size() != 2) {
|
||||
MS_LOG(EXCEPTION) << "relu kernel dims invalid " << src_shape.size();
|
||||
}
|
||||
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
|
||||
|
||||
dnnl::eltwise_forward::desc desc =
|
||||
dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_relu, src_desc, 0.0);
|
||||
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
if (kernel_name == "ReLU6") {
|
||||
desc =
|
||||
dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_clip, src_desc, 0.0, 6.0);
|
||||
}
|
||||
|
||||
auto prim_desc = dnnl::eltwise_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());
|
||||
primitive_ = std::make_shared<dnnl::eltwise_forward>(prim_desc);
|
||||
|
||||
AddArgument(DNNL_ARG_SRC, src_desc);
|
||||
AddArgument(DNNL_ARG_DST, src_desc);
|
||||
}
|
||||
|
||||
bool ReluCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> & /*workspace*/,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
if (inputs.empty() || outputs.empty()) {
|
||||
MS_LOG(EXCEPTION) << "error input output size!";
|
||||
}
|
||||
SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);
|
||||
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
|
||||
ExecutePrimitive();
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -1,42 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_CPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class ReluCPUKernel : public MKLCPUKernel {
|
||||
public:
|
||||
ReluCPUKernel() = default;
|
||||
~ReluCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(ReLU, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), ReluCPUKernel);
|
||||
MS_REG_CPU_KERNEL(ReLU6, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
ReluCPUKernel);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_CPU_KERNEL_H_
|
|
@ -1,69 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/relu_grad_cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "utils/ms_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
void ReluGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
if (src_shape.size() != 4 && src_shape.size() != 2) {
|
||||
MS_LOG(EXCEPTION) << "relu grad kernel dims invalid " << src_shape.size();
|
||||
}
|
||||
dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);
|
||||
|
||||
dnnl::eltwise_forward::desc forward_desc =
|
||||
dnnl::eltwise_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::eltwise_relu, src_desc, 0.0);
|
||||
auto forward_prim_desc = dnnl::eltwise_forward::primitive_desc(forward_desc, MKLKernelEngine::Get().engine());
|
||||
|
||||
dnnl::eltwise_backward::desc backward_desc =
|
||||
dnnl::eltwise_backward::desc(dnnl::algorithm::eltwise_relu, src_desc, src_desc, 0.0, 0.0);
|
||||
auto backward_prim_desc =
|
||||
dnnl::eltwise_backward::primitive_desc(backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc);
|
||||
primitive_ = std::make_shared<dnnl::eltwise_backward>(backward_prim_desc);
|
||||
|
||||
AddArgument(DNNL_ARG_SRC, src_desc);
|
||||
AddArgument(DNNL_ARG_DIFF_SRC, src_desc);
|
||||
AddArgument(DNNL_ARG_DIFF_DST, src_desc);
|
||||
}
|
||||
|
||||
bool ReluGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> & /*workspace*/,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
if (inputs.size() < 2 || outputs.empty()) {
|
||||
MS_LOG(EXCEPTION) << "relu grad error input output size!";
|
||||
}
|
||||
if (inputs[0]->size != outputs[0]->size) {
|
||||
MS_LOG(EXCEPTION) << "relu grad error input output data size!";
|
||||
}
|
||||
|
||||
SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr);
|
||||
SetArgumentHandle(DNNL_ARG_DIFF_SRC, inputs[0]->addr);
|
||||
SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr);
|
||||
ExecutePrimitive();
|
||||
size_t mem_bits = outputs[0]->size;
|
||||
auto ret = memcpy_s(outputs[0]->addr, mem_bits, inputs[0]->addr, mem_bits);
|
||||
if (ret != 0) {
|
||||
MS_LOG(EXCEPTION) << "memcpy_s error, errorno " << ret;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -1,43 +0,0 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_GRAD_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_GRAD_CPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class ReluGradCPUKernel : public MKLCPUKernel {
|
||||
public:
|
||||
ReluGradCPUKernel() = default;
|
||||
~ReluGradCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(
|
||||
ReluGrad,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
ReluGradCPUKernel);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RELU_GRAD_CPU_KERNEL_H_
|
|
@ -25,17 +25,45 @@ void TensorAddCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
|||
std::vector<size_t> src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
std::vector<size_t> src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
if (src0_shape.size() != src1_shape.size() && src1_shape.size() > 1) {
|
||||
MS_LOG(EXCEPTION) << "TensorAdd only support same dim input or tensor * scalar " << src0_shape.size() << " vs "
|
||||
<< src1_shape.size();
|
||||
if (src1_shape.size() != src0_shape.size()) {
|
||||
if (src0_shape.size() == 0) {
|
||||
need_swap_ = true;
|
||||
for (size_t i = 0; i < src1_shape.size(); ++i) {
|
||||
src0_shape.emplace_back(1);
|
||||
}
|
||||
if (src1_shape.size() < src0_shape.size()) {
|
||||
for (size_t i = src1_shape.size(); i < src0_shape.size(); ++i) {
|
||||
} else if (src1_shape.size() == 0) {
|
||||
for (size_t i = 0; i < src0_shape.size(); ++i) {
|
||||
src1_shape.emplace_back(1);
|
||||
}
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Invalid broadcast! " << src0_shape << " vs " << src1_shape;
|
||||
}
|
||||
} else {
|
||||
bool visit_src0 = false;
|
||||
bool visit_src1 = false;
|
||||
for (size_t i = 0; i < src0_shape.size(); ++i) {
|
||||
if (src0_shape[i] != src1_shape[i]) {
|
||||
if (src0_shape[i] == 1 && !visit_src1) {
|
||||
need_swap_ = true;
|
||||
visit_src0 = true;
|
||||
} else if (src1_shape[i] == 1 && !visit_src0) {
|
||||
need_swap_ = false;
|
||||
visit_src1 = true;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Invalid broadcast! " << src0_shape << " vs " << src1_shape;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
dnnl::memory::desc src0_desc;
|
||||
dnnl::memory::desc src1_desc;
|
||||
if (need_swap_) {
|
||||
src0_desc = GetDefaultMemDesc(src1_shape);
|
||||
src1_desc = GetDefaultMemDesc(src0_shape);
|
||||
} else {
|
||||
src0_desc = GetDefaultMemDesc(src0_shape);
|
||||
src1_desc = GetDefaultMemDesc(src1_shape);
|
||||
}
|
||||
dnnl::memory::desc src0_desc = GetDefaultMemDesc(src0_shape);
|
||||
dnnl::memory::desc src1_desc = GetDefaultMemDesc(src1_shape);
|
||||
dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);
|
||||
dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_add, src0_desc, src1_desc, dst_desc);
|
||||
auto prim_desc = dnnl::binary::primitive_desc(desc, MKLKernelEngine::Get().engine());
|
||||
|
@ -51,8 +79,13 @@ bool TensorAddCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
|||
if (inputs.size() < 2 || outputs.empty()) {
|
||||
MS_LOG(EXCEPTION) << "TensorAdd error input output size!";
|
||||
}
|
||||
if (need_swap_) {
|
||||
SetArgumentHandle(DNNL_ARG_SRC_0, inputs[1]->addr);
|
||||
SetArgumentHandle(DNNL_ARG_SRC_1, inputs[0]->addr);
|
||||
} else {
|
||||
SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr);
|
||||
SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr);
|
||||
}
|
||||
SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);
|
||||
ExecutePrimitive();
|
||||
return true;
|
||||
|
|
|
@ -31,6 +31,9 @@ class TensorAddCPUKernel : public MKLCPUKernel {
|
|||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
private:
|
||||
bool need_swap_{false};
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(
|
||||
|
|
|
@ -39,6 +39,7 @@ MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutput
|
|||
ReshapeCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
ReshapeCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Reshape, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), ReshapeCPUKernel);
|
||||
|
||||
MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
ReshapeCPUKernel);
|
||||
|
@ -46,6 +47,7 @@ MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutput
|
|||
ReshapeCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
ReshapeCPUKernel);
|
||||
MS_REG_CPU_KERNEL(Flatten, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), ReshapeCPUKernel);
|
||||
|
||||
MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
ReshapeCPUKernel);
|
||||
|
@ -53,6 +55,8 @@ MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOut
|
|||
ReshapeCPUKernel);
|
||||
MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
ReshapeCPUKernel);
|
||||
MS_REG_CPU_KERNEL(ExpandDims, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
|
||||
ReshapeCPUKernel);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -560,10 +560,16 @@ def get_bprop_gelu(self):
|
|||
def get_bprop_fused_batch_norm(self):
|
||||
"""Grad definition for `FusedBatchNorm` operation."""
|
||||
input_grad = G.FusedBatchNormGrad(self.epsilon, self.momentum)
|
||||
|
||||
target_cpu = False
|
||||
if self.target == "CPU":
|
||||
input_grad = G.FusedBatchNormGradCPU(self.epsilon, self.momentum)
|
||||
target_cpu = True
|
||||
def bprop(x, scale, b, mean, variance, out, dout):
|
||||
saved_mean = out[3]
|
||||
saved_variance = out[4]
|
||||
if target_cpu:
|
||||
out = input_grad(dout[0], x, scale, b, saved_mean, saved_variance)
|
||||
else:
|
||||
out = input_grad(dout[0], x, scale, saved_mean, saved_variance)
|
||||
dx = out[0]
|
||||
dscale = out[1]
|
||||
|
|
|
@ -540,6 +540,22 @@ class FusedBatchNormGrad(Primitive):
|
|||
raise NotImplementedError
|
||||
|
||||
|
||||
class FusedBatchNormGradCPU(PrimitiveWithInfer):
|
||||
"""Gradients of FusedBatchNorm operation for CPU."""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self, epsilon=0.0, momentum=0.1):
|
||||
self.init_prim_io_names(inputs=['dy', 'x', 'scale', 'bias', 'save_mean', 'save_inv_variance'],
|
||||
outputs=['dx', 'bn_scale', 'bn_bias'])
|
||||
self.add_prim_attr('data_format', "NCHW")
|
||||
|
||||
def infer_shape(self, dy_shape, x_shape, scale_shape, bias_shape, save_mean_shape, save_inv_variance_shape):
|
||||
return (x_shape, scale_shape, bias_shape)
|
||||
|
||||
def infer_dtype(self, dy_type, x_type, scale_type, bias_type, save_mean_type, save_inv_variance_type):
|
||||
return (x_type, scale_type, bias_type)
|
||||
|
||||
|
||||
class FusedBatchNormGradEx(PrimitiveWithInfer):
|
||||
"""Gradients of FusedBatchNormEx operation."""
|
||||
|
||||
|
|
|
@ -640,6 +640,7 @@ class FusedBatchNorm(Primitive):
|
|||
self.epsilon = validator.check_float_range(epsilon, 0, 1, Rel.INC_RIGHT, 'epsilon', self.name)
|
||||
self.momentum = validator.check_float_range(momentum, 0, 1, Rel.INC_BOTH, 'momentum', self.name)
|
||||
self._update_parameter = True
|
||||
self.target = context.get_context("device_target")
|
||||
|
||||
|
||||
class FusedBatchNormEx(PrimitiveWithInfer):
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.common.api import ms_function
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import GradOperation
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
|
||||
|
||||
class Grad(nn.Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.grad = GradOperation(get_all=True, sens_param=True)
|
||||
self.network = network
|
||||
|
||||
@ms_function
|
||||
def construct(self, input_, output_grad):
|
||||
return self.grad(self.network)(input_, output_grad)
|
||||
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.ops = P.Abs()
|
||||
|
||||
def construct(self, x):
|
||||
return self.ops(x)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_net():
|
||||
x = np.random.randn(2, 3, 3, 4).astype(np.float32)
|
||||
y_expect = np.abs(x)
|
||||
net = Net()
|
||||
out = net(Tensor(x))
|
||||
assert (out.asnumpy() == y_expect).all()
|
||||
sens = np.random.randn(2, 3, 3, 4).astype(np.float32)
|
||||
backword_net = Grad(Net())
|
||||
output = backword_net(Tensor(x), Tensor(sens))
|
||||
print(len(output))
|
||||
print(output[0].asnumpy())
|
|
@ -80,3 +80,39 @@ def test_train_forward():
|
|||
bn_net = Batchnorm_Net(2, Tensor(weight), Tensor(bias), Tensor(moving_mean), Tensor(moving_var_init))
|
||||
bn_net.set_train(False)
|
||||
output = bn_net(Tensor(x))
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_train_backward():
|
||||
x = np.array([[
|
||||
[[1, 3, 3, 5], [2, 4, 6, 8], [3, 6, 7, 7], [4, 3, 8, 2]],
|
||||
[[5, 7, 6, 3], [3, 5, 6, 7], [9, 4, 2, 5], [7, 5, 8, 1]]]]).astype(np.float32)
|
||||
grad = np.array([[
|
||||
[[1, 2, 7, 1], [4, 2, 1, 3], [1, 6, 5, 2], [2, 4, 3, 2]],
|
||||
[[9, 4, 3, 5], [1, 3, 7, 6], [5, 7, 9, 9], [1, 4, 6, 8]]]]).astype(np.float32)
|
||||
expect_output = np.array([[[[-0.69126546, -0.32903028, 1.9651246, -0.88445705],
|
||||
[0.6369296, -0.37732816, -0.93275493, -0.11168876],
|
||||
[-0.7878612, 1.3614, 0.8542711, -0.52222186],
|
||||
[-0.37732816, 0.5886317, -0.11168876, -0.28073236]],
|
||||
|
||||
[[1.6447213, -0.38968924, -1.0174079, -0.55067265],
|
||||
[-2.4305856, -1.1751484, 0.86250514, 0.5502673],
|
||||
[0.39576983, 0.5470243, 1.1715001, 1.6447213],
|
||||
[-1.7996241, -0.7051701, 0.7080077, 0.5437813]]]]).astype(np.float32)
|
||||
|
||||
weight = Tensor(np.ones(2).astype(np.float32))
|
||||
bias = Tensor(np.ones(2).astype(np.float32))
|
||||
moving_mean = Tensor(np.ones(2).astype(np.float32))
|
||||
moving_var_init = Tensor(np.ones(2).astype(np.float32))
|
||||
error = np.ones(shape=[1, 2, 4, 4]) * 1.0e-6
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
bn_net = Batchnorm_Net(2, weight, bias, moving_mean, moving_var_init)
|
||||
bn_net.set_train()
|
||||
bn_grad = Grad(bn_net)
|
||||
output = bn_grad(Tensor(x), Tensor(grad))
|
||||
diff = output[0].asnumpy() - expect_output
|
||||
assert np.all(diff < error)
|
||||
assert np.all(-diff < error)
|
||||
|
|
|
@ -0,0 +1,76 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.common.dtype as mstype
|
||||
import mindspore.context as context
|
||||
from mindspore.common.tensor import Tensor
|
||||
from mindspore.nn import Cell
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
|
||||
class Net(Cell):
|
||||
def __init__(self, dtype):
|
||||
super(Net, self).__init__()
|
||||
self.Cast = P.Cast()
|
||||
self.dtype = dtype
|
||||
|
||||
def construct(self, x):
|
||||
return self.Cast(x, self.dtype)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_cast_int32():
|
||||
x0 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.float32))
|
||||
x1 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.int32))
|
||||
x2 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.bool))
|
||||
t = mstype.int32
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
net = Net(t)
|
||||
output = net(x0)
|
||||
type0 = output.asnumpy().dtype
|
||||
assert type0 == 'int32'
|
||||
output = net(x1)
|
||||
type1 = output.asnumpy().dtype
|
||||
assert type1 == 'int32'
|
||||
output = net(x2)
|
||||
type2 = output.asnumpy().dtype
|
||||
assert type2 == 'int32'
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_cast_float32():
|
||||
x0 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.float32))
|
||||
x1 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.int32))
|
||||
x2 = Tensor(np.random.uniform(-2, 2, (3, 2)).astype(np.bool))
|
||||
t = mstype.float32
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
net = Net(t)
|
||||
output = net(x0)
|
||||
type0 = output.asnumpy().dtype
|
||||
assert type0 == 'float32'
|
||||
output = net(x1)
|
||||
type1 = output.asnumpy().dtype
|
||||
assert type1 == 'float32'
|
||||
output = net(x2)
|
||||
type2 = output.asnumpy().dtype
|
||||
assert type2 == 'float32'
|
|
@ -0,0 +1,56 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
|
||||
class NetExp(nn.Cell):
|
||||
def __init__(self):
|
||||
super(NetExp, self).__init__()
|
||||
self.exp = P.Exp()
|
||||
|
||||
def construct(self, x):
|
||||
return self.exp(x)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_exp():
|
||||
x0_np = np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)
|
||||
x1_np = np.random.uniform(-2, 2, 1).astype(np.float32)
|
||||
x0 = Tensor(x0_np)
|
||||
x1 = Tensor(x1_np)
|
||||
expect0 = np.exp(x0_np)
|
||||
expect1 = np.exp(x1_np)
|
||||
error0 = np.ones(shape=expect0.shape) * 1.0e-5
|
||||
error1 = np.ones(shape=expect1.shape) * 1.0e-5
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
exp = NetExp()
|
||||
output0 = exp(x0)
|
||||
diff0 = output0.asnumpy() - expect0
|
||||
assert np.all(diff0 < error0)
|
||||
assert output0.shape == expect0.shape
|
||||
output1 = exp(x1)
|
||||
diff1 = output1.asnumpy() - expect1
|
||||
assert np.all(diff1 < error1)
|
||||
assert output1.shape == expect1.shape
|
|
@ -0,0 +1,83 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import numpy as np
|
||||
import pytest
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.ops = P.Less()
|
||||
|
||||
def construct(self, x, y):
|
||||
return self.ops(x, y)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_net():
|
||||
x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
|
||||
y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
|
||||
x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
|
||||
y1_np = np.random.randint(1, 5, (2, 1, 4, 4)).astype(np.float32)
|
||||
x2_np = np.random.randint(1, 5, (2, 1, 1, 4)).astype(np.float32)
|
||||
y2_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
|
||||
x3_np = np.random.randint(1, 5, 1).astype(np.float32)
|
||||
y3_np = np.random.randint(1, 5, 1).astype(np.float32)
|
||||
x4_np = np.array(768).astype(np.float32)
|
||||
y4_np = np.array(3072.5).astype(np.float32)
|
||||
|
||||
x0 = Tensor(x0_np)
|
||||
y0 = Tensor(y0_np)
|
||||
x1 = Tensor(x1_np)
|
||||
y1 = Tensor(y1_np)
|
||||
x2 = Tensor(x2_np)
|
||||
y2 = Tensor(y2_np)
|
||||
x3 = Tensor(x3_np)
|
||||
y3 = Tensor(y3_np)
|
||||
x4 = Tensor(x4_np)
|
||||
y4 = Tensor(y4_np)
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
net = Net()
|
||||
out = net(x0, y0).asnumpy()
|
||||
expect = x0_np < y0_np
|
||||
assert np.all(out == expect)
|
||||
assert out.shape == expect.shape
|
||||
|
||||
out = net(x1, y1).asnumpy()
|
||||
expect = x1_np < y1_np
|
||||
assert np.all(out == expect)
|
||||
assert out.shape == expect.shape
|
||||
|
||||
out = net(x2, y2).asnumpy()
|
||||
expect = x2_np < y2_np
|
||||
assert np.all(out == expect)
|
||||
assert out.shape == expect.shape
|
||||
|
||||
out = net(x3, y3).asnumpy()
|
||||
expect = x3_np < y3_np
|
||||
assert np.all(out == expect)
|
||||
assert out.shape == expect.shape
|
||||
|
||||
out = net(x4, y4).asnumpy()
|
||||
expect = x4_np < y4_np
|
||||
assert np.all(out == expect)
|
||||
assert out.shape == expect.shape
|
|
@ -0,0 +1,56 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
|
||||
class NetLog(nn.Cell):
|
||||
def __init__(self):
|
||||
super(NetLog, self).__init__()
|
||||
self.log = P.Log()
|
||||
|
||||
def construct(self, x):
|
||||
return self.log(x)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_log():
|
||||
x0_np = np.random.uniform(1, 2, (2, 3, 4, 4)).astype(np.float32)
|
||||
x1_np = np.random.uniform(1, 2, 1).astype(np.float32)
|
||||
x0 = Tensor(x0_np)
|
||||
x1 = Tensor(x1_np)
|
||||
expect0 = np.log(x0_np)
|
||||
expect1 = np.log(x1_np)
|
||||
error0 = np.ones(shape=expect0.shape) * 1.0e-5
|
||||
error1 = np.ones(shape=expect1.shape) * 1.0e-5
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
log = NetLog()
|
||||
output0 = log(x0)
|
||||
output1 = log(x1)
|
||||
diff0 = output0.asnumpy() - expect0
|
||||
assert np.all(diff0 < error0)
|
||||
assert output0.shape == expect0.shape
|
||||
diff1 = output1.asnumpy() - expect1
|
||||
assert np.all(diff1 < error1)
|
||||
assert output1.shape == expect1.shape
|
|
@ -16,38 +16,53 @@
|
|||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.common.dtype as mstype
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore import Tensor, context
|
||||
from mindspore.common.api import ms_function
|
||||
from mindspore.common.initializer import initializer
|
||||
from mindspore.common.parameter import Parameter
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
x = np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32)
|
||||
y = np.random.uniform(-2, 2, (1, 1, 1, 1)).astype(np.float32)
|
||||
|
||||
context.set_context(device_target='CPU')
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.mul = P.Mul()
|
||||
self.x = Parameter(initializer(Tensor(x), x.shape), name='x3')
|
||||
self.y = Parameter(initializer(Tensor(y), y.shape), name='y3')
|
||||
|
||||
@ms_function
|
||||
def construct(self):
|
||||
return self.mul(self.x, self.y)
|
||||
def construct(self, x, y):
|
||||
return self.mul(x, y)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_Mul():
|
||||
def test_mul():
|
||||
x0 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32))
|
||||
y0 = Tensor(np.random.uniform(-2, 2, (1, 1, 1, 1)).astype(np.float32))
|
||||
x1 = Tensor(np.random.uniform(-2, 2, (1, 3, 1, 4)).astype(np.float32))
|
||||
y1 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32))
|
||||
x2 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32))
|
||||
y2 = Tensor(2, mstype.float32)
|
||||
mul = Net()
|
||||
output = mul()
|
||||
print(x)
|
||||
print(y)
|
||||
print(output)
|
||||
out = mul(x0, y0).asnumpy()
|
||||
exp = x0.asnumpy() * y0.asnumpy()
|
||||
diff = np.abs(out - exp)
|
||||
err = np.ones(shape=exp.shape) * 1.0e-5
|
||||
assert np.all(diff < err)
|
||||
assert out.shape == exp.shape
|
||||
|
||||
out = mul(x1, y1).asnumpy()
|
||||
exp = x1.asnumpy() * y1.asnumpy()
|
||||
diff = np.abs(out - exp)
|
||||
err = np.ones(shape=exp.shape) * 1.0e-5
|
||||
assert np.all(diff < err)
|
||||
assert out.shape == exp.shape
|
||||
|
||||
out = mul(x2, y2).asnumpy()
|
||||
exp = x2.asnumpy() * y2.asnumpy()
|
||||
diff = np.abs(out - exp)
|
||||
err = np.ones(shape=exp.shape) * 1.0e-5
|
||||
assert np.all(diff < err)
|
||||
assert out.shape == exp.shape
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.common.api import ms_function
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import GradOperation
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
|
||||
|
||||
class Grad(nn.Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.grad = GradOperation(get_all=True, sens_param=True)
|
||||
self.network = network
|
||||
|
||||
@ms_function
|
||||
def construct(self, input_, output_grad):
|
||||
return self.grad(self.network)(input_, output_grad)
|
||||
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.ops = P.Neg()
|
||||
|
||||
def construct(self, x):
|
||||
return self.ops(x)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_net():
|
||||
x = np.random.randn(2, 3, 3, 4).astype(np.float32)
|
||||
y_expect = -x
|
||||
net = Net()
|
||||
out = net(Tensor(x))
|
||||
assert (out.asnumpy() == y_expect).all()
|
||||
sens = np.random.randn(2, 3, 3, 4).astype(np.float32)
|
||||
backword_net = Grad(Net())
|
||||
output = backword_net(Tensor(x), Tensor(sens))
|
||||
print(len(output))
|
||||
print(output[0].asnumpy())
|
|
@ -0,0 +1,58 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.ops = P.Pow()
|
||||
|
||||
def construct(self, x, y):
|
||||
return self.ops(x, y)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_net():
|
||||
x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
|
||||
y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
|
||||
x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
|
||||
y1_np = np.array(3).astype(np.float32)
|
||||
|
||||
x0 = Tensor(x0_np)
|
||||
y0 = Tensor(y0_np)
|
||||
x1 = Tensor(x1_np)
|
||||
y1 = Tensor(y1_np)
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
net = Net()
|
||||
out = net(x0, y0).asnumpy()
|
||||
expect = np.power(x0_np, y0_np)
|
||||
assert np.all(out == expect)
|
||||
assert out.shape == expect.shape
|
||||
|
||||
out = net(x1, y1).asnumpy()
|
||||
expect = np.power(x1_np, y1_np)
|
||||
assert np.all(out == expect)
|
||||
assert out.shape == expect.shape
|
|
@ -0,0 +1,95 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
|
||||
class NetRealDiv(nn.Cell):
|
||||
def __init__(self):
|
||||
super(NetRealDiv, self).__init__()
|
||||
self.divide = P.RealDiv()
|
||||
|
||||
def construct(self, x, y):
|
||||
return self.divide(x, y)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_real_div():
|
||||
x0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
|
||||
y0_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
|
||||
x1_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
|
||||
y1_np = np.random.randint(1, 5, (2, 1, 4, 4)).astype(np.float32)
|
||||
x2_np = np.random.randint(1, 5, (2, 1, 1, 4)).astype(np.float32)
|
||||
y2_np = np.random.randint(1, 5, (2, 3, 4, 4)).astype(np.float32)
|
||||
x3_np = np.random.randint(1, 5, 1).astype(np.float32)
|
||||
y3_np = np.random.randint(1, 5, 1).astype(np.float32)
|
||||
x4_np = np.array(768).astype(np.float32)
|
||||
y4_np = np.array(3072.5).astype(np.float32)
|
||||
|
||||
x0 = Tensor(x0_np)
|
||||
y0 = Tensor(y0_np)
|
||||
x1 = Tensor(x1_np)
|
||||
y1 = Tensor(y1_np)
|
||||
x2 = Tensor(x2_np)
|
||||
y2 = Tensor(y2_np)
|
||||
x3 = Tensor(x3_np)
|
||||
y3 = Tensor(y3_np)
|
||||
x4 = Tensor(x4_np)
|
||||
y4 = Tensor(y4_np)
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
real_div = NetRealDiv()
|
||||
output0 = real_div(x0, y0)
|
||||
expect0 = np.divide(x0_np, y0_np)
|
||||
diff0 = output0.asnumpy() - expect0
|
||||
error0 = np.ones(shape=expect0.shape) * 1.0e-5
|
||||
assert np.all(diff0 < error0)
|
||||
assert output0.shape == expect0.shape
|
||||
|
||||
output1 = real_div(x1, y1)
|
||||
expect1 = np.divide(x1_np, y1_np)
|
||||
diff1 = output1.asnumpy() - expect1
|
||||
error1 = np.ones(shape=expect1.shape) * 1.0e-5
|
||||
assert np.all(diff1 < error1)
|
||||
assert output1.shape == expect1.shape
|
||||
|
||||
output2 = real_div(x2, y2)
|
||||
expect2 = np.divide(x2_np, y2_np)
|
||||
diff2 = output2.asnumpy() - expect2
|
||||
error2 = np.ones(shape=expect2.shape) * 1.0e-5
|
||||
assert np.all(diff2 < error2)
|
||||
assert output2.shape == expect2.shape
|
||||
|
||||
output3 = real_div(x3, y3)
|
||||
expect3 = np.divide(x3_np, y3_np)
|
||||
diff3 = output3.asnumpy() - expect3
|
||||
error3 = np.ones(shape=expect3.shape) * 1.0e-5
|
||||
assert np.all(diff3 < error3)
|
||||
assert output3.shape == expect3.shape
|
||||
|
||||
output4 = real_div(x4, y4)
|
||||
expect4 = np.divide(x4_np, y4_np)
|
||||
diff4 = output4.asnumpy() - expect4
|
||||
error4 = np.ones(shape=expect4.shape) * 1.0e-5
|
||||
assert np.all(diff4 < error4)
|
||||
assert output4.shape == expect4.shape
|
|
@ -20,7 +20,9 @@ import mindspore.context as context
|
|||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.operations import _grad_ops as G
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
|
||||
class NetReLU6(nn.Cell):
|
||||
def __init__(self):
|
||||
|
@ -30,6 +32,13 @@ class NetReLU6(nn.Cell):
|
|||
def construct(self, x):
|
||||
return self.relu6(x)
|
||||
|
||||
class NetReLU6Grad(nn.Cell):
|
||||
def __init__(self):
|
||||
super(NetReLU6Grad, self).__init__()
|
||||
self.relu6_grad = G.ReLU6Grad()
|
||||
|
||||
def construct(self, x, dy):
|
||||
return self.relu6_grad(dy, x)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
|
@ -42,7 +51,26 @@ def test_relu6():
|
|||
[5.9, 6, 6,],
|
||||
[6, 1, 0.]]]]).astype(np.float32)
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
relu6 = NetReLU6()
|
||||
output = relu6(x)
|
||||
assert (output.asnumpy() == expect).all()
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_relu6_grad():
|
||||
x = Tensor(np.array([[[[-1, 1, 10],
|
||||
[5.9, 6.1, 6],
|
||||
[10, 1, -1]]]]).astype(np.float32))
|
||||
dy = Tensor(np.array([[[[1, 1, 1],
|
||||
[1, 1, 1],
|
||||
[1, 1, 1]]]]).astype(np.float32))
|
||||
expect = np.array([[[[0, 1, 0,],
|
||||
[1, 0, 1,],
|
||||
[0, 1, 0,]]]]).astype(np.float32)
|
||||
error = np.ones(shape=[3, 3]) * 1.0e-6
|
||||
|
||||
relu6_grad = NetReLU6Grad()
|
||||
output = relu6_grad(x, dy)
|
||||
diff = np.abs(output.asnumpy() - expect)
|
||||
assert np.all(np.abs(diff) < error)
|
||||
|
|
|
@ -49,5 +49,5 @@ def test_relu_grad():
|
|||
output = relu_grad()
|
||||
expect = np.array([[[[0, 0, 1,], [0, 0, 0,], [1, 1, 0.]]]]).astype(np.float32)
|
||||
error = np.ones(shape=[3, 3]) * 1.0e-6
|
||||
diff = output.asnumpy() - expect
|
||||
diff = np.abs(output.asnumpy() - expect)
|
||||
assert np.all(diff < error)
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.operations import _grad_ops as G
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
|
||||
|
||||
class NetSigmoidGrad(nn.Cell):
|
||||
def __init__(self):
|
||||
super(NetSigmoidGrad, self).__init__()
|
||||
self.sigmoid_grad = G.SigmoidGrad()
|
||||
|
||||
def construct(self, y, dy):
|
||||
return self.sigmoid_grad(y, dy)
|
||||
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.ops = P.Sigmoid()
|
||||
|
||||
def construct(self, x):
|
||||
return self.ops(x)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_net():
|
||||
x = np.random.randn(2, 3, 3, 4).astype(np.float32)
|
||||
y_expect = 1 / (1 + np.exp(-x))
|
||||
net = Net()
|
||||
out = net(Tensor(x))
|
||||
diff = out.asnumpy() - y_expect
|
||||
err = np.ones(shape=y_expect.shape) * 1.0e-5
|
||||
assert np.all(diff < err)
|
||||
assert out.shape == y_expect.shape
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_sigmoid_grad():
|
||||
y = Tensor(np.array([[[[-1, 1, 2],
|
||||
[1, -1, 1],
|
||||
[2, 1, -1]]]]).astype(np.float32))
|
||||
dy = Tensor(np.array([[[[-11, 2, 4],
|
||||
[-1, 1, -1],
|
||||
[-4, 4, -4]]]]).astype(np.float32))
|
||||
|
||||
expect = np.array([[[[22, 0, -8],
|
||||
[0, -2, 0],
|
||||
[8, 0, 8]]]]).astype(np.float32)
|
||||
|
||||
error = np.ones(shape=[1, 1, 3, 3]) * 1.0e-6
|
||||
|
||||
sigmoid_grad = NetSigmoidGrad()
|
||||
output = sigmoid_grad(y, dy)
|
||||
diff = np.abs(output.asnumpy() - expect)
|
||||
assert np.all(abs(diff) < error)
|
|
@ -0,0 +1,75 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.operations import _grad_ops as G
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
|
||||
class NetSqrtGrad(nn.Cell):
|
||||
def __init__(self):
|
||||
super(NetSqrtGrad, self).__init__()
|
||||
self.sqrt_grad = G.SqrtGrad()
|
||||
|
||||
def construct(self, x, dx):
|
||||
return self.sqrt_grad(x, dx)
|
||||
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.ops = P.Sqrt()
|
||||
|
||||
def construct(self, x):
|
||||
return self.ops(x)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_net():
|
||||
x = np.abs(np.random.randn(2, 3, 3, 4)).astype(np.float32)
|
||||
y_expect = np.sqrt(x)
|
||||
net = Net()
|
||||
out = net(Tensor(x))
|
||||
diff = out.asnumpy() - y_expect
|
||||
err = np.ones(shape=y_expect.shape) * 1.0e-5
|
||||
assert np.all(diff < err)
|
||||
assert out.shape == y_expect.shape
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_sqrt_grad():
|
||||
x = Tensor(np.array([[[[-1, 1, 10],
|
||||
[5.9, 6.1, 6],
|
||||
[10, 1, -1]]]]).astype(np.float32))
|
||||
dx = Tensor(np.array([[[[1, 1, 1],
|
||||
[2, 2, 2],
|
||||
[3, 3, 3]]]]).astype(np.float32))
|
||||
expect = np.array([[[[-0.5, 0.5, 0.05,],
|
||||
[0.16949153, 0.16393442, 0.16666667,],
|
||||
[0.15, 1.5, -1.5,]]]]).astype(np.float32)
|
||||
error = np.ones(shape=[3, 3]) * 1.0e-6
|
||||
|
||||
sqrt_grad = NetSqrtGrad()
|
||||
output = sqrt_grad(x, dx)
|
||||
diff = np.abs(output.asnumpy() - expect)
|
||||
assert np.all(np.abs(diff) < error)
|
|
@ -0,0 +1,63 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.common.api import ms_function
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import GradOperation
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
|
||||
|
||||
class Grad(nn.Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.grad = GradOperation(get_all=True, sens_param=True)
|
||||
self.network = network
|
||||
|
||||
@ms_function
|
||||
def construct(self, input_, output_grad):
|
||||
return self.grad(self.network)(input_, output_grad)
|
||||
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.ops = P.Square()
|
||||
|
||||
def construct(self, x):
|
||||
return self.ops(x)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_net():
|
||||
x = np.random.randn(2, 3, 3, 4).astype(np.float32)
|
||||
y_expect = x * x
|
||||
net = Net()
|
||||
out = net(Tensor(x))
|
||||
diff = out.asnumpy() - y_expect
|
||||
err = np.ones(shape=y_expect.shape) * 1.0e-5
|
||||
assert np.all(diff < err)
|
||||
assert out.shape == y_expect.shape
|
||||
sens = np.random.randn(2, 3, 3, 4).astype(np.float32)
|
||||
backword_net = Grad(Net())
|
||||
output = backword_net(Tensor(x), Tensor(sens))
|
||||
print(len(output))
|
||||
print(output[0].asnumpy())
|
|
@ -0,0 +1,63 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.common.api import ms_function
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.composite import GradOperation
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
|
||||
|
||||
class Grad(nn.Cell):
|
||||
def __init__(self, network):
|
||||
super(Grad, self).__init__()
|
||||
self.grad = GradOperation(get_all=True, sens_param=True)
|
||||
self.network = network
|
||||
|
||||
@ms_function
|
||||
def construct(self, input_, output_grad):
|
||||
return self.grad(self.network)(input_, output_grad)
|
||||
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.ops = P.Tanh()
|
||||
|
||||
def construct(self, x):
|
||||
return self.ops(x)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_net():
|
||||
x = np.random.randn(2, 3, 3, 4).astype(np.float32)
|
||||
y_expect = np.tanh(x)
|
||||
net = Net()
|
||||
out = net(Tensor(x))
|
||||
diff = out.asnumpy() - y_expect
|
||||
err = np.ones(shape=y_expect.shape) * 1.0e-5
|
||||
assert np.all(diff < err)
|
||||
assert out.shape == y_expect.shape
|
||||
sens = np.random.randn(2, 3, 3, 4).astype(np.float32)
|
||||
backword_net = Grad(Net())
|
||||
output = backword_net(Tensor(x), Tensor(sens))
|
||||
print(len(output))
|
||||
print(output[0].asnumpy())
|
|
@ -13,12 +13,15 @@
|
|||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
import pytest
|
||||
|
||||
import mindspore.common.dtype as mstype
|
||||
import mindspore.nn as nn
|
||||
import mindspore.context as context
|
||||
from mindspore import Tensor, context
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
|
||||
class TensorAdd(nn.Cell):
|
||||
def __init__(self):
|
||||
|
@ -34,10 +37,30 @@ class TensorAdd(nn.Cell):
|
|||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_tensor_add():
|
||||
x = np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32)
|
||||
y = np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32)
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
x0 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32))
|
||||
y0 = Tensor(np.random.uniform(-2, 2, (1, 1, 1, 1)).astype(np.float32))
|
||||
x1 = Tensor(np.random.uniform(-2, 2, (1, 3, 1, 4)).astype(np.float32))
|
||||
y1 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32))
|
||||
x2 = Tensor(np.random.uniform(-2, 2, (2, 3, 4, 4)).astype(np.float32))
|
||||
y2 = Tensor(2, mstype.float32)
|
||||
add = TensorAdd()
|
||||
output = add(Tensor(x), Tensor(y))
|
||||
assert (output.asnumpy() == x + y).all()
|
||||
out = add(x0, y0).asnumpy()
|
||||
exp = x0.asnumpy() + y0.asnumpy()
|
||||
diff = np.abs(out - exp)
|
||||
err = np.ones(shape=exp.shape) * 1.0e-5
|
||||
assert np.all(diff < err)
|
||||
assert out.shape == exp.shape
|
||||
|
||||
out = add(x1, y1).asnumpy()
|
||||
exp = x1.asnumpy() + y1.asnumpy()
|
||||
diff = np.abs(out - exp)
|
||||
err = np.ones(shape=exp.shape) * 1.0e-5
|
||||
assert np.all(diff < err)
|
||||
assert out.shape == exp.shape
|
||||
|
||||
out = add(x2, y2).asnumpy()
|
||||
exp = x2.asnumpy() + y2.asnumpy()
|
||||
diff = np.abs(out - exp)
|
||||
err = np.ones(shape=exp.shape) * 1.0e-5
|
||||
assert np.all(diff < err)
|
||||
assert out.shape == exp.shape
|
||||
|
|
Loading…
Reference in New Issue