[assistant][ops] Fix Atan,AtanGrad,Atan2,FloorDiv,FloorMod GPU and CPU with DataType
This commit is contained in:
parent
c03a1aeeff
commit
262faacc09
|
@ -21,6 +21,7 @@
|
|||
#include <memory>
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
#include <complex>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
|
||||
|
@ -150,6 +151,7 @@ class ArithmeticCpuTypeFunc : public CpuKernelFunc {
|
|||
void Div(const T *input1, const T *input2, T *out);
|
||||
void DivNoNan(const T *input1, const T *input2, T *out);
|
||||
void FloorDiv(const T *input1, const T *input2, T *out);
|
||||
void FloorDivComplex(const T *input1, const T *input2, T *out);
|
||||
void Mod(const T *input1, const T *input2, T *out);
|
||||
void FloorMod(const T *input1, const T *input2, T *out);
|
||||
void Pow(const T *input1, const T *input2, T *out);
|
||||
|
@ -223,6 +225,7 @@ class ArithmeticCpuTypeFunc : public CpuKernelFunc {
|
|||
arithmeticMathFuncMap = {{kSquaredDifference, &ArithmeticCpuTypeFunc<T>::SquaredDifferenceComplex},
|
||||
{kSub, &ArithmeticCpuTypeFunc<T>::Sub},
|
||||
{kDiv, &ArithmeticCpuTypeFunc<T>::DivComplex},
|
||||
{kFloorDiv, &ArithmeticCpuTypeFunc<T>::FloorDivComplex},
|
||||
{kRealDiv, &ArithmeticCpuTypeFunc<T>::RealDivComplex},
|
||||
{kMul, &ArithmeticCpuTypeFunc<T>::Mul},
|
||||
{kDivNoNan, &ArithmeticCpuTypeFunc<T>::DivNoNan},
|
||||
|
@ -612,6 +615,28 @@ void ArithmeticCpuTypeFunc<T>::FloorDiv(const T *input1, const T *input2, T *out
|
|||
ParallelLaunchAutoSearch(task, output_size_, this, ¶llel_search_info_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ArithmeticCpuTypeFunc<T>::FloorDivComplex(const T *input1, const T *input2, T *out) {
|
||||
BroadcastIterator base_iter(input_shape1_, input_shape2_, output_shape_);
|
||||
auto task = [&input1, &input2, &out, &base_iter](size_t start, size_t end) {
|
||||
auto iter = base_iter;
|
||||
iter.SetPos(start);
|
||||
for (size_t i = start; i < end; i++) {
|
||||
auto dividend = input1[iter.GetInputPosA()];
|
||||
auto divisor = input2[iter.GetInputPosB()];
|
||||
iter.GenNextPos();
|
||||
auto zero = static_cast<T>(0);
|
||||
if (divisor == zero) {
|
||||
out[i] = std::numeric_limits<T>::quiet_NaN();
|
||||
continue;
|
||||
}
|
||||
auto temp = dividend / divisor;
|
||||
out[i] = static_cast<T>(std::complex<double>(floor(temp.real()), 0));
|
||||
}
|
||||
};
|
||||
ParallelLaunchAutoSearch(task, output_size_, this, ¶llel_search_info_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ArithmeticCpuTypeFunc<T>::Mod(const T *input1, const T *input2, T *out) {
|
||||
if (!is_init_broadcast_) {
|
||||
|
@ -1005,6 +1030,8 @@ static std::map<std::string, std::vector<std::pair<KernelAttr, ArithmeticCpuFunc
|
|||
{kFloorDiv,
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
|
||||
SpecializeArithFunc<int8_t>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
|
||||
SpecializeArithFunc<int16_t>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
SpecializeArithFunc<int>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
|
@ -1013,12 +1040,26 @@ static std::map<std::string, std::vector<std::pair<KernelAttr, ArithmeticCpuFunc
|
|||
SpecializeArithFunc<uint8_t>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeUInt16).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
|
||||
SpecializeArithFunc<uint16_t>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeUInt32).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
|
||||
SpecializeArithFunc<uint32_t>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeUInt64).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
|
||||
SpecializeArithFunc<uint64_t>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
SpecializeArithFunc<float16>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
SpecializeArithFunc<float>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
SpecializeArithFunc<double>}}},
|
||||
SpecializeArithFunc<double>},
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeComplex64)
|
||||
.AddInputAttr(kNumberTypeComplex64)
|
||||
.AddOutputAttr(kNumberTypeComplex64),
|
||||
SpecializeArithFunc<complex64>},
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddOutputAttr(kNumberTypeComplex128),
|
||||
SpecializeArithFunc<complex128>}}},
|
||||
{kMod,
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
SpecializeArithFunc<int64_t>},
|
||||
|
@ -1035,14 +1076,28 @@ static std::map<std::string, std::vector<std::pair<KernelAttr, ArithmeticCpuFunc
|
|||
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
SpecializeArithFunc<double>}}},
|
||||
{kFloorMod,
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
SpecializeArithFunc<int64_t>},
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
|
||||
SpecializeArithFunc<int8_t>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
|
||||
SpecializeArithFunc<int16_t>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
SpecializeArithFunc<int32_t>},
|
||||
SpecializeArithFunc<int>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
SpecializeArithFunc<int64_t>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
|
||||
SpecializeArithFunc<uint8_t>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeUInt16).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
|
||||
SpecializeArithFunc<uint16_t>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeUInt32).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
|
||||
SpecializeArithFunc<uint32_t>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeUInt64).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
|
||||
SpecializeArithFunc<uint64_t>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
SpecializeArithFunc<float16>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
SpecializeArithFunc<float>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
SpecializeArithFunc<float16>}}},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
SpecializeArithFunc<double>}}},
|
||||
{kAssignAdd,
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
SpecializeArithFunc<float>},
|
||||
|
@ -1174,7 +1229,9 @@ static std::map<std::string, std::vector<std::pair<KernelAttr, ArithmeticCpuFunc
|
|||
.AddOutputAttr(kNumberTypeComplex128),
|
||||
SpecializeArithFunc<complex128>}}},
|
||||
{kAtan2,
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
SpecializeArithFunc<float16>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
SpecializeArithFunc<float>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
SpecializeArithFunc<double>}}},
|
||||
|
|
|
@ -261,6 +261,16 @@ void Atan(ArithmeticSelfCpuKernelFunc *content, const T *in, T *out, size_t size
|
|||
ParallelLaunchAutoSearch(task, size, content, &content->parallel_search_info_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ComplexAtan(ArithmeticSelfCpuKernelFunc *content, const T *in, T *out, size_t size) {
|
||||
auto task = [&in, &out](size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
out[i] = static_cast<T>(atan(in[i]));
|
||||
}
|
||||
};
|
||||
ParallelLaunchAutoSearch(task, size, content, &content->parallel_search_info_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Sin(ArithmeticSelfCpuKernelFunc *content, const T *in, T *out, size_t size) {
|
||||
auto task = [&in, &out](size_t start, size_t end) {
|
||||
|
@ -736,6 +746,7 @@ void ArithmeticSelfCpuKernelFunc::LaunchKernelComplex(const std::vector<AddressP
|
|||
{prim::kPrimReciprocal->name(), Reciprocal<T>},
|
||||
{prim::kPrimSqrt->name(), Sqrt<T>},
|
||||
{prim::kPrimTan->name(), Tan<T>},
|
||||
{prim::kPrimAtan->name(), ComplexAtan<T>},
|
||||
{prim::kPrimTanh->name(), Tanh<T>},
|
||||
{prim::kPrimAtanh->name(), Atanh<T>},
|
||||
{prim::kPrimInv->name(), Inv<T>},
|
||||
|
@ -763,7 +774,8 @@ void ArithmeticSelfCpuKernelFunc::LaunchKernelFloat16(const std::vector<AddressP
|
|||
{prim::kPrimSinh->name(), Sinh<float16>}, {prim::kPrimCosh->name(), Cosh<float16>},
|
||||
{prim::kPrimAsinh->name(), Asinh<float16>}, {prim::kPrimErfc->name(), Erfc<float16>},
|
||||
{prim::kPrimRsqrt->name(), Rsqrt<float16>}, {prim::kPrimErf->name(), Erf<float16>},
|
||||
{prim::kPrimSign->name(), Sign<float16>}, {prim::kPrimRint->name(), Rint<float16>}};
|
||||
{prim::kPrimSign->name(), Sign<float16>}, {prim::kPrimRint->name(), Rint<float16>},
|
||||
{prim::kPrimAtan->name(), Atan<float16>}};
|
||||
const auto func_pair = arithmeticSelfFuncMap.find(kernel_name_);
|
||||
if (arithmeticSelfFuncMap.find(kernel_name_) == arithmeticSelfFuncMap.end()) {
|
||||
MS_LOG(EXCEPTION) << "For 'ArithmeticSelf', it does not support " << kernel_name_ << " with float16 as input. ";
|
||||
|
@ -966,7 +978,10 @@ static std::map<std::string, std::vector<std::pair<KernelAttr, ArithFuncCreator>
|
|||
{KernelAttr().AddInputAttr(kNumberTypeComplex128).AddOutputAttr(kNumberTypeComplex128), CreateArithSelfFunc}}},
|
||||
{kAtan,
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), CreateArithSelfFunc},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), CreateArithSelfFunc}}},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), CreateArithSelfFunc},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), CreateArithSelfFunc},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeComplex64).AddOutputAttr(kNumberTypeComplex64), CreateArithSelfFunc},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeComplex128).AddOutputAttr(kNumberTypeComplex128), CreateArithSelfFunc}}},
|
||||
{kSin,
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), CreateArithSelfFunc},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), CreateArithSelfFunc},
|
||||
|
|
|
@ -69,6 +69,7 @@ class EltWiseGradCpuTypeFunc : public CpuKernelFunc {
|
|||
void ACosGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
|
||||
void ComplexACosGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
|
||||
void AtanGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
|
||||
void ComplexAtanGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
|
||||
void AsinhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
|
||||
void ComplexAsinhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
|
||||
void InvGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
|
||||
|
@ -312,18 +313,20 @@ void EltWiseGradCpuTypeFunc<T>::ComplexACosGrad(const T *input1, const T *input2
|
|||
|
||||
template <typename T>
|
||||
void EltWiseGradCpuTypeFunc<T>::AtanGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const {
|
||||
auto one = static_cast<T>(1);
|
||||
auto zero = static_cast<T>(0);
|
||||
for (size_t i = start; i < end; i++) {
|
||||
T dividend = input2[i];
|
||||
T divisor = 1 + input1[i] * input1[i];
|
||||
if (std::equal_to<T>()(divisor, 0)) {
|
||||
if (std::equal_to<T>()(dividend, 0)) {
|
||||
T divisor = one + input1[i] * input1[i];
|
||||
if (std::equal_to<T>()(divisor, zero)) {
|
||||
if (std::equal_to<T>()(dividend, zero)) {
|
||||
out[i] = std::numeric_limits<T>::quiet_NaN();
|
||||
continue;
|
||||
}
|
||||
if (std::numeric_limits<T>::has_infinity) {
|
||||
out[i] = dividend > 0 ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity();
|
||||
out[i] = dividend > zero ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity();
|
||||
} else {
|
||||
out[i] = dividend > 0 ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min();
|
||||
out[i] = dividend > zero ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
@ -331,6 +334,20 @@ void EltWiseGradCpuTypeFunc<T>::AtanGrad(const T *input1, const T *input2, T *ou
|
|||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void EltWiseGradCpuTypeFunc<T>::ComplexAtanGrad(const T *input1, const T *input2, T *out, size_t start,
|
||||
size_t end) const {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
T dividend = input2[i];
|
||||
T divisor = static_cast<T>(1) + input1[i] * input1[i];
|
||||
if (std::equal_to<T>()(divisor, static_cast<T>(0))) {
|
||||
out[i] = std::numeric_limits<T>::quiet_NaN();
|
||||
continue;
|
||||
}
|
||||
out[i] = dividend / conj(divisor);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void EltWiseGradCpuTypeFunc<T>::AsinhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
|
@ -450,7 +467,8 @@ void EltWiseGradCpuTypeFunc<T>::InitFunc(const BaseOperatorPtr &base_operator, c
|
|||
if constexpr (std::is_same_v<T, float16>) {
|
||||
static const std::map<std::string,
|
||||
std::function<void(EltWiseGradCpuTypeFunc *, const T *, const T *, T *, size_t, size_t)>>
|
||||
elt_map{{prim::kPrimReluGrad->name(), &EltWiseGradCpuTypeFunc<T>::ReluGrad},
|
||||
elt_map{{prim::kPrimAtanGrad->name(), &EltWiseGradCpuTypeFunc<T>::AtanGrad},
|
||||
{prim::kPrimReluGrad->name(), &EltWiseGradCpuTypeFunc<T>::ReluGrad},
|
||||
{prim::kPrimReciprocalGrad->name(), &EltWiseGradCpuTypeFunc<T>::ReciprocalGrad},
|
||||
{prim::kPrimRsqrtGrad->name(), &EltWiseGradCpuTypeFunc<T>::RsqrtGrad}};
|
||||
if (elt_map.find(kernel_name_) == elt_map.end()) {
|
||||
|
@ -462,8 +480,7 @@ void EltWiseGradCpuTypeFunc<T>::InitFunc(const BaseOperatorPtr &base_operator, c
|
|||
if constexpr (std::is_same_v<T, float>) {
|
||||
static const std::map<std::string,
|
||||
std::function<void(EltWiseGradCpuTypeFunc *, const T *, const T *, T *, size_t, size_t)>>
|
||||
elt_map{{prim::kPrimReluGrad->name(), &EltWiseGradCpuTypeFunc<T>::ReluGrad},
|
||||
{prim::kPrimRelu6Grad->name(), &EltWiseGradCpuTypeFunc<T>::ReLU6Grad},
|
||||
elt_map{{prim::kPrimRelu6Grad->name(), &EltWiseGradCpuTypeFunc<T>::ReLU6Grad},
|
||||
{prim::kPrimSigmoidGrad->name(), &EltWiseGradCpuTypeFunc<T>::SigmoidGrad},
|
||||
{prim::kPrimAbsGrad->name(), &EltWiseGradCpuTypeFunc<T>::AbsGrad},
|
||||
{prim::kPrimTanhGrad->name(), &EltWiseGradCpuTypeFunc<T>::TanhGrad},
|
||||
|
@ -516,6 +533,7 @@ void EltWiseGradCpuTypeFunc<T>::InitFunc(const BaseOperatorPtr &base_operator, c
|
|||
{prim::kPrimAsinGrad->name(), &EltWiseGradCpuTypeFunc<T>::ComplexAsinGrad},
|
||||
{prim::kPrimACosGrad->name(), &EltWiseGradCpuTypeFunc<T>::ComplexACosGrad},
|
||||
{prim::kPrimTanhGrad->name(), &EltWiseGradCpuTypeFunc<T>::TanhGrad},
|
||||
{prim::kPrimAtanGrad->name(), &EltWiseGradCpuTypeFunc<T>::ComplexAtanGrad},
|
||||
{prim::kPrimInvGrad->name(), &EltWiseGradCpuTypeFunc<T>::InvGrad},
|
||||
{prim::kPrimSqrtGrad->name(), &EltWiseGradCpuTypeFunc<T>::SqrtGrad},
|
||||
{prim::kPrimReciprocalGrad->name(), &EltWiseGradCpuTypeFunc<T>::ReciprocalGrad},
|
||||
|
@ -649,8 +667,22 @@ static std::map<std::string, std::vector<std::pair<KernelAttr, FuncCreator>>> ke
|
|||
.AddOutputAttr(kNumberTypeComplex128),
|
||||
&SpecializeEltWiseGradFunc<complex128>}}},
|
||||
{kAtanGrad,
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
&SpecializeEltWiseGradFunc<float>}}},
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
&SpecializeEltWiseGradFunc<float16>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
&SpecializeEltWiseGradFunc<float>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
&SpecializeEltWiseGradFunc<double>},
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeComplex64)
|
||||
.AddInputAttr(kNumberTypeComplex64)
|
||||
.AddOutputAttr(kNumberTypeComplex64),
|
||||
&SpecializeEltWiseGradFunc<complex64>},
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddOutputAttr(kNumberTypeComplex128),
|
||||
&SpecializeEltWiseGradFunc<complex128>}}},
|
||||
{kAsinhGrad,
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
&SpecializeEltWiseGradFunc<float>},
|
||||
|
|
|
@ -394,6 +394,19 @@ HOST_DEVICE inline Complex<T> asinh(const Complex<T> &z) {
|
|||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
HOST_DEVICE inline Complex<T> atan(const Complex<T> &z) {
|
||||
Complex<T> result;
|
||||
#if defined(__CUDACC__)
|
||||
auto thrust_result = thrust::atan(thrust::complex<T>(z));
|
||||
result.real(thrust_result.real());
|
||||
result.imag(thrust_result.imag());
|
||||
#else
|
||||
result(std::tan(std::complex<T>(z)));
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
HOST_DEVICE inline Complex<T> atanh(const Complex<T> &z) {
|
||||
Complex<T> result;
|
||||
|
|
|
@ -189,6 +189,27 @@ __global__ void AtanGradKernel(const T *input, const T *dout, T *output, const s
|
|||
return;
|
||||
}
|
||||
|
||||
template <>
|
||||
__global__ void AtanGradKernel(const double *input, const double *dout, double *output, const size_t count) {
|
||||
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) {
|
||||
double one = 1;
|
||||
double divisor = one + input[i] * input[i];
|
||||
output[i] = dout[i] / divisor;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void AtanGradKernel(const Complex<T> *input, const Complex<T> *dout, Complex<T> *output,
|
||||
const size_t count) {
|
||||
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) {
|
||||
Complex<T> one = Complex<T>(1);
|
||||
Complex<T> divisor = one + input[i] * input[i];
|
||||
output[i] = dout[i] / conj(divisor);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void TanhGradKernel(const T *__restrict__ input, const T *dout, T *output, const size_t count) {
|
||||
const T one = static_cast<T>(1);
|
||||
|
@ -346,6 +367,13 @@ void AtanGrad(const T *input, const T *dout, T *output, const size_t count, cuda
|
|||
return;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void AtanGrad(const Complex<T> *input, const Complex<T> *dout, Complex<T> *output, const size_t count,
|
||||
cudaStream_t cuda_stream) {
|
||||
AtanGradKernel<<<GET_BLOCKS(count), GET_THREADS, 0, cuda_stream>>>(input, dout, output, count);
|
||||
return;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void TanhGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) {
|
||||
TanhGradKernel<<<GET_BLOCKS(count), GET_THREADS, 0, cuda_stream>>>(input, dout, output, count);
|
||||
|
@ -629,6 +657,13 @@ template CUDA_LIB_EXPORT void InvGrad<Complex<float>>(const Complex<float> *inpu
|
|||
template CUDA_LIB_EXPORT void RsqrtGrad<Complex<float>>(const Complex<float> *input, const Complex<float> *dout,
|
||||
Complex<float> *output, const size_t count,
|
||||
cudaStream_t cuda_stream);
|
||||
template CUDA_LIB_EXPORT void AtanGrad<Complex<float>>(const Complex<float> *input, const Complex<float> *dout,
|
||||
Complex<float> *output, const size_t count,
|
||||
cudaStream_t cuda_stream);
|
||||
|
||||
template CUDA_LIB_EXPORT void AtanGrad<Complex<double>>(const Complex<double> *input, const Complex<double> *dout,
|
||||
Complex<double> *output, const size_t count,
|
||||
cudaStream_t cuda_stream);
|
||||
template CUDA_LIB_EXPORT void RsqrtGrad<Complex<double>>(const Complex<double> *input, const Complex<double> *dout,
|
||||
Complex<double> *output, const size_t count,
|
||||
cudaStream_t cuda_stream);
|
||||
|
|
|
@ -535,6 +535,13 @@ __global__ void AtanKernel(const double *input, double *output, const size_t cou
|
|||
return;
|
||||
}
|
||||
template <typename T>
|
||||
__global__ void AtanKernel(const Complex<T> *input, Complex<T> *output, const size_t count) {
|
||||
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) {
|
||||
output[i] = atan(input[i]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
template <typename T>
|
||||
__global__ void TanhKernel(const Complex<T> *input, Complex<T> *output, const size_t count) {
|
||||
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) {
|
||||
output[i] = tanh(input[i]);
|
||||
|
@ -1007,6 +1014,11 @@ void Atan(const T *input, T *output, const size_t count, cudaStream_t cuda_strea
|
|||
return;
|
||||
}
|
||||
template <typename T>
|
||||
void Atan(const Complex<T> *input, Complex<T> *output, const size_t count, cudaStream_t cuda_stream) {
|
||||
AtanKernel<<<GET_BLOCKS(count), GET_THREADS, 0, cuda_stream>>>(input, output, count);
|
||||
return;
|
||||
}
|
||||
template <typename T>
|
||||
void Asinh(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) {
|
||||
AsinhKernel<<<GET_BLOCKS(count), GET_THREADS, 0, cuda_stream>>>(input, output, count);
|
||||
return;
|
||||
|
@ -1835,6 +1847,8 @@ template CUDA_LIB_EXPORT void Sign<Complex<float>>(const Complex<float> *input,
|
|||
const size_t count, cudaStream_t cuda_stream);
|
||||
template CUDA_LIB_EXPORT void Cosh<Complex<float>>(const Complex<float> *input, Complex<float> *output,
|
||||
const size_t count, cudaStream_t cuda_stream);
|
||||
template CUDA_LIB_EXPORT void Atan<Complex<float>>(const Complex<float> *input, Complex<float> *output,
|
||||
const size_t count, cudaStream_t cuda_stream);
|
||||
template CUDA_LIB_EXPORT void Atanh<Complex<float>>(const Complex<float> *input, Complex<float> *output,
|
||||
const size_t count, cudaStream_t cuda_stream);
|
||||
template CUDA_LIB_EXPORT void Reciprocal<Complex<float>>(const Complex<float> *input, Complex<float> *output,
|
||||
|
@ -1877,6 +1891,8 @@ template CUDA_LIB_EXPORT void Asinh<Complex<double>>(const Complex<double> *inpu
|
|||
const size_t count, cudaStream_t cuda_stream);
|
||||
template CUDA_LIB_EXPORT void Tan<Complex<double>>(const Complex<double> *input, Complex<double> *output,
|
||||
const size_t count, cudaStream_t cuda_stream);
|
||||
template CUDA_LIB_EXPORT void Atan<Complex<double>>(const Complex<double> *input, Complex<double> *output,
|
||||
const size_t count, cudaStream_t cuda_stream);
|
||||
template CUDA_LIB_EXPORT void Rsqrt<Complex<double>>(const Complex<double> *input, Complex<double> *output,
|
||||
const size_t count, cudaStream_t cuda_stream);
|
||||
template CUDA_LIB_EXPORT void Sign<Complex<double>>(const Complex<double> *input, Complex<double> *output,
|
||||
|
|
|
@ -327,7 +327,13 @@ std::map<std::string, std::vector<std::pair<KernelAttr, UnaryOpGpuKernelMod::Una
|
|||
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
&UnaryOpGpuKernelMod::LaunchKernel<float>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
&UnaryOpGpuKernelMod::LaunchKernel<half>}}},
|
||||
&UnaryOpGpuKernelMod::LaunchKernel<half>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
&UnaryOpGpuKernelMod::LaunchKernel<double>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeComplex64).AddOutputAttr(kNumberTypeComplex64),
|
||||
&UnaryOpGpuKernelMod::LaunchKernel<utils::Complex<float>>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeComplex128).AddOutputAttr(kNumberTypeComplex128),
|
||||
&UnaryOpGpuKernelMod::LaunchKernel<utils::Complex<double>>}}},
|
||||
{kAtanh,
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
&UnaryOpGpuKernelMod::LaunchKernel<double>},
|
||||
|
@ -473,7 +479,7 @@ bool UnaryOpGpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &in
|
|||
{kInv, Inv<T>}, {kLog, Logarithm<T>}, {kExp, Exponential<T>}, {kNeg, Negative<T>},
|
||||
{kSin, Sin<T>}, {kCos, Cos<T>}, {kACos, ACos<T>}, {kAcosh, Acosh<T>},
|
||||
{kAsin, Asin<T>}, {kAsinh, Asinh<T>}, {kSquare, Square<T>}, {kReciprocal, Reciprocal<T>},
|
||||
{kRsqrt, Rsqrt<T>}, {kSign, Sign<T>}};
|
||||
{kRsqrt, Rsqrt<T>}, {kSign, Sign<T>}, {kAtan, Atan<T>}};
|
||||
copy(func_map_complex.begin(), func_map_complex.end(), inserter(func_map, func_map.begin()));
|
||||
} else {
|
||||
std::map<std::string, std::function<void(const T *, T *, const size_t, cudaStream_t)>> func_map_normal = {
|
||||
|
|
|
@ -110,7 +110,19 @@ std::map<std::string, std::vector<std::pair<KernelAttr, UnaryGradOpGpuKernelMod:
|
|||
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
&UnaryGradOpGpuKernelMod::LaunchKernel<float>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
&UnaryGradOpGpuKernelMod::LaunchKernel<half>}}},
|
||||
&UnaryGradOpGpuKernelMod::LaunchKernel<half>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
&UnaryGradOpGpuKernelMod::LaunchKernel<double>},
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeComplex64)
|
||||
.AddInputAttr(kNumberTypeComplex64)
|
||||
.AddOutputAttr(kNumberTypeComplex64),
|
||||
&UnaryGradOpGpuKernelMod::LaunchKernel<utils::Complex<float>>},
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddInputAttr(kNumberTypeComplex128)
|
||||
.AddOutputAttr(kNumberTypeComplex128),
|
||||
&UnaryGradOpGpuKernelMod::LaunchKernel<utils::Complex<double>>}}},
|
||||
{kAsinhGrad,
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
&UnaryGradOpGpuKernelMod::LaunchKernel<float>},
|
||||
|
@ -246,7 +258,8 @@ bool UnaryGradOpGpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr>
|
|||
{kAsinGrad, AsinGrad<T>},
|
||||
{kAsinhGrad, AsinhGrad<T>},
|
||||
{kSqrtGrad, SqrtGrad<T>},
|
||||
{kRsqrtGrad, RsqrtGrad<T>}};
|
||||
{kRsqrtGrad, RsqrtGrad<T>},
|
||||
{kAtanGrad, AtanGrad<T>}};
|
||||
copy(func_map_complex.begin(), func_map_complex.end(), inserter(func_map, func_map.begin()));
|
||||
} else {
|
||||
std::map<std::string, std::function<void(const T *, const T *, T *, const size_t, cudaStream_t)>> func_map_normal =
|
||||
|
|
|
@ -48,7 +48,7 @@ TypePtr AtanInferType(const PrimitivePtr &primitive, const std::vector<AbstractB
|
|||
auto prim_name = primitive->name();
|
||||
MS_EXCEPTION_IF_NULL(input_args[0]);
|
||||
auto x_type = input_args[0]->BuildType();
|
||||
const std::set valid_types = {kFloat16, kFloat32, kFloat64};
|
||||
const std::set valid_types = {kFloat16, kFloat32, kFloat64, kComplex64, kComplex128};
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeValid("input_x", x_type, valid_types, prim_name);
|
||||
return x_type;
|
||||
}
|
||||
|
|
|
@ -60,8 +60,8 @@ TypePtr FloorDivInferType(const PrimitivePtr &primitive, const std::vector<Abstr
|
|||
<< " one of the inputs must be tensor type but got " << input_type01->ToString() << " and "
|
||||
<< input_type02->ToString() << ".";
|
||||
}
|
||||
const std::set<TypePtr> valid_types = {kFloat16, kFloat32, kFloat64, kInt8, kInt16,
|
||||
kInt32, kInt64, kUInt8, kUInt16, kBool};
|
||||
const std::set<TypePtr> valid_types = {kFloat16, kFloat32, kFloat64, kInt8, kInt16, kInt32, kInt64,
|
||||
kUInt8, kUInt16, kUInt32, kUInt64, kBool, kComplex64, kComplex128};
|
||||
(void)CheckAndConvertUtils::CheckTypeValid("x", input_type01, valid_types, prim_name);
|
||||
(void)CheckAndConvertUtils::CheckTypeValid("y", input_type02, valid_types, prim_name);
|
||||
return input_type01;
|
||||
|
|
|
@ -38,7 +38,7 @@ TypePtr AtanGradInferType(const PrimitivePtr &primitive, const std::vector<Abstr
|
|||
MS_EXCEPTION_IF_NULL(input_args[0]);
|
||||
auto x_type = input_args[0]->BuildType();
|
||||
MS_EXCEPTION_IF_NULL(x_type);
|
||||
const std::set<TypePtr> valid_types = {kFloat16, kFloat32};
|
||||
const std::set<TypePtr> valid_types = {kFloat16, kFloat32, kFloat64, kComplex64, kComplex128};
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeValid("input_x", x_type, valid_types, prim_name);
|
||||
return x_type;
|
||||
}
|
||||
|
|
|
@ -1914,6 +1914,7 @@ def atan2(x, y):
|
|||
Args:
|
||||
x (Tensor): The input tensor.
|
||||
:math:`(N,*)` where :math:`*` means, any number of additional dimensions.
|
||||
The data type should be one of the following types: float16, float32, float64
|
||||
y (Tensor): The input tensor. It has the same shape with `x`.
|
||||
|
||||
Returns:
|
||||
|
|
|
@ -20,7 +20,6 @@ import mindspore.nn as nn
|
|||
from mindspore import Tensor
|
||||
from mindspore import context
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
|
||||
|
||||
|
@ -36,16 +35,14 @@ class NetAtan2(nn.Cell):
|
|||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_atan2():
|
||||
np_array = np.array([1, 2, 3, 4, 5]).astype('float32')
|
||||
input_x = Tensor(np_array)
|
||||
net = NetAtan2()
|
||||
output = net(input_x, input_x)
|
||||
print(output)
|
||||
expect = np.arctan2(np_array, np_array)
|
||||
assert np.allclose(output.asnumpy(), expect)
|
||||
|
||||
np_array = np.array([1, 2, 3, 4, 5], dtype=np.float64)
|
||||
@pytest.mark.parametrize('dtype', [np.float16, np.float32, np.float64])
|
||||
def test_atan2(dtype):
|
||||
"""
|
||||
Feature: ALL To ALL
|
||||
Description: test cases for Atan2
|
||||
Expectation: the result match to numpy
|
||||
"""
|
||||
np_array = np.array([1, 2, 3, 4, 5]).astype(dtype)
|
||||
input_x = Tensor(np_array)
|
||||
net = NetAtan2()
|
||||
output = net(input_x, input_x)
|
||||
|
|
|
@ -20,27 +20,53 @@ import mindspore.nn as nn
|
|||
from mindspore import Tensor
|
||||
from mindspore import context
|
||||
from mindspore.ops.operations import _grad_ops as G
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
|
||||
|
||||
class NetAtanGrad(nn.Cell):
|
||||
def __init__(self):
|
||||
super(NetAtanGrad, self).__init__()
|
||||
self.atanGrad = G.AtanGrad()
|
||||
self.atan_grad = G.AtanGrad()
|
||||
|
||||
def construct(self, x, dy):
|
||||
return self.atanGrad(x, dy)
|
||||
return self.atan_grad(x, dy)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_atan_grad():
|
||||
x = np.array([-0.5, 0, 0.5]).astype('float32')
|
||||
dy = np.array([1, 0, -1]).astype('float32')
|
||||
@pytest.mark.parametrize('dtype', [np.float16, np.float32, np.float64])
|
||||
def test_atan_grad_float(dtype):
|
||||
"""
|
||||
Feature: ALL To ALL
|
||||
Description: test cases for AtanGrad
|
||||
Expectation: the result match to numpy
|
||||
"""
|
||||
x = np.array([-0.5, 0, 0.5]).astype(dtype)
|
||||
dy = np.array([1, 0, -1]).astype(dtype)
|
||||
atan_grad = NetAtanGrad()
|
||||
output = atan_grad(Tensor(x), Tensor(dy))
|
||||
print(output)
|
||||
expect = dy / (1 + x * x)
|
||||
assert np.allclose(output.asnumpy(), expect)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
@pytest.mark.parametrize('dtype', [np.complex64, np.complex128])
|
||||
def test_atan_grad_complex(dtype):
|
||||
"""
|
||||
Feature: ALL To ALL
|
||||
Description: test cases for AtanGrad
|
||||
Expectation: the result match to numpy
|
||||
"""
|
||||
x = np.array([-0.5, 0, 0.5]).astype(dtype)
|
||||
x = x + 0.5j * x
|
||||
dy = np.array([1, 0, -1]).astype(dtype)
|
||||
dy = dy + 0.3j * dy
|
||||
atan_grad = NetAtanGrad()
|
||||
output = atan_grad(Tensor(x), Tensor(dy))
|
||||
print(output)
|
||||
expect = dy / np.conjugate(1 + x * x)
|
||||
assert np.allclose(output.asnumpy(), expect)
|
||||
|
|
|
@ -20,7 +20,6 @@ import mindspore.nn as nn
|
|||
from mindspore import Tensor
|
||||
from mindspore import context
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
|
||||
|
||||
|
@ -79,5 +78,44 @@ def test_atan_forward_float32_tensor_api():
|
|||
test_atan_forward_tensor_api(np.float32)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
@pytest.mark.parametrize('dtype', [np.float16, np.float32, np.float64])
|
||||
def test_atan_float(dtype):
|
||||
"""
|
||||
Feature: ALL To ALL
|
||||
Description: test cases for Atan
|
||||
Expectation: the result match to numpy
|
||||
"""
|
||||
np_array = np.array([-1, -0.5, 0, 0.5, 1], dtype=dtype)
|
||||
input_x = Tensor(np_array)
|
||||
net = NetAtan()
|
||||
output = net(input_x)
|
||||
print(output)
|
||||
expect = np.arctan(np_array)
|
||||
assert np.allclose(output.asnumpy(), expect)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
@pytest.mark.parametrize('dtype', [np.complex64, np.complex128])
|
||||
def test_atan_complex(dtype):
|
||||
"""
|
||||
Feature: ALL To ALL
|
||||
Description: test cases for Atan
|
||||
Expectation: the result match to numpy
|
||||
"""
|
||||
np_array = np.array([-1, -0.5, 0, 0.5, 1], dtype=dtype)
|
||||
np_array = np_array + 0.5j * np_array
|
||||
input_x = Tensor(np_array)
|
||||
net = NetAtan()
|
||||
output = net(input_x)
|
||||
print(output)
|
||||
expect = np.arctan(np_array)
|
||||
assert np.allclose(output.asnumpy(), expect)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_atan_forward_float32_tensor_api()
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
# Copyright 2022 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
|
||||
|
||||
class NetFloorDiv(nn.Cell):
|
||||
def __init__(self):
|
||||
super(NetFloorDiv, self).__init__()
|
||||
self.floordiv = P.FloorDiv()
|
||||
|
||||
def construct(self, x, y):
|
||||
return self.floordiv(x, y)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
@pytest.mark.parametrize('dtype', [np.float16, np.float32, np.float64, np.int8, np.int16, np.int32,
|
||||
np.int64, np.uint8, np.uint16, np.uint32, np.uint64])
|
||||
def testtype_floor_div_int_float(dtype):
|
||||
"""
|
||||
Feature: ALL To ALL
|
||||
Description: test cases for FloorDiv
|
||||
Expectation: the result match to numpy
|
||||
"""
|
||||
x_np = np.random.rand(1, 5).astype(dtype)
|
||||
y_np = np.random.rand(1, 5).astype(dtype)
|
||||
expect = np.floor_divide(x_np, y_np)
|
||||
x_input = Tensor(x_np)
|
||||
y_input = Tensor(y_np)
|
||||
floor_div = NetFloorDiv()
|
||||
output = floor_div(x_input, y_input)
|
||||
assert np.allclose(output.asnumpy(), expect)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
@pytest.mark.parametrize('dtype', [np.complex64, np.complex128])
|
||||
def testtype_floor_div_complex(dtype):
|
||||
"""
|
||||
Feature: ALL To ALL
|
||||
Description: test cases for FloorDiv
|
||||
Expectation: the result match to numpy
|
||||
"""
|
||||
x_np = np.random.rand(1, 5).astype(dtype)
|
||||
x_np = x_np + 0.5j * x_np
|
||||
y_np = np.random.rand(1, 5).astype(dtype)
|
||||
y_np = y_np + 0.4j * y_np
|
||||
expect = np.floor_divide(x_np, y_np)
|
||||
x_input = Tensor(x_np)
|
||||
y_input = Tensor(y_np)
|
||||
floor_div = NetFloorDiv()
|
||||
output = floor_div(x_input, y_input)
|
||||
assert np.allclose(output.asnumpy(), expect)
|
|
@ -0,0 +1,53 @@
|
|||
# Copyright 2022 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
|
||||
|
||||
class FloorModNet(nn.Cell):
|
||||
def __init__(self):
|
||||
super(FloorModNet, self).__init__()
|
||||
self.floor_mod = P.FloorMod()
|
||||
|
||||
def construct(self, x, y):
|
||||
return self.floor_mod(x, y)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
@pytest.mark.parametrize('dtype', [np.float16, np.float32, np.float64, np.int8, np.int16, np.int32,
|
||||
np.int64, np.uint8, np.uint16, np.uint32, np.uint64])
|
||||
def testtype_floor_mod(dtype):
|
||||
"""
|
||||
Feature: ALL To ALL
|
||||
Description: test cases for FloorMod
|
||||
Expectation: the result match to numpy
|
||||
"""
|
||||
x_np = np.random.rand(1, 5).astype(dtype)
|
||||
y_np = np.random.rand(1, 5).astype(dtype) + 1
|
||||
expect = np.mod(x_np, y_np)
|
||||
x_input = Tensor(x_np)
|
||||
y_input = Tensor(y_np)
|
||||
floor_mod = FloorModNet()
|
||||
output = floor_mod(x_input, y_input)
|
||||
assert np.allclose(output.asnumpy(), expect)
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
# Copyright 2022 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -22,22 +22,72 @@ import mindspore.ops.operations._grad_ops as P
|
|||
context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
|
||||
np.random.seed(1)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_atangrad_fp32():
|
||||
"""
|
||||
Feature: ALL To ALL
|
||||
Description: test cases for AtanGrad float32
|
||||
Expectation: the result match to numpy
|
||||
"""
|
||||
x_np = np.random.rand(4, 2).astype(np.float32) * 10
|
||||
dout_np = np.random.rand(4, 2).astype(np.float32) * 10
|
||||
output_ms = P.AtanGrad()(Tensor(x_np), Tensor(dout_np))
|
||||
output_np = dout_np / (1 + np.square(x_np))
|
||||
assert np.allclose(output_ms.asnumpy(), output_np, 1e-4, 1e-4)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_atangrad_fp16():
|
||||
"""
|
||||
Feature: ALL To ALL
|
||||
Description: test cases for AtanGrad float16
|
||||
Expectation: the result match to numpy
|
||||
"""
|
||||
x_np = np.random.rand(4, 2).astype(np.float16) * 10
|
||||
dout_np = np.random.rand(4, 2).astype(np.float16) * 10
|
||||
output_ms = P.AtanGrad()(Tensor(x_np), Tensor(dout_np))
|
||||
output_np = dout_np.astype(np.float32) / (1 + np.square(x_np.astype(np.float32)))
|
||||
assert np.allclose(output_ms.asnumpy(), output_np.astype(np.float16), 1e-3, 1e-3)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
@pytest.mark.parametrize('dtype', [np.float16, np.float32, np.float64])
|
||||
def test_atan_grad_float(dtype):
|
||||
"""
|
||||
Feature: ALL To ALL
|
||||
Description: test cases for AtanGrad
|
||||
Expectation: the result match to numpy
|
||||
"""
|
||||
x = np.array([-0.5, 0, 0.5]).astype(dtype)
|
||||
dy = np.array([1, 0, -1]).astype(dtype)
|
||||
output = P.AtanGrad()(Tensor(x), Tensor(dy))
|
||||
print(output)
|
||||
expect = dy / (1 + x * x)
|
||||
assert np.allclose(output.asnumpy(), expect)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
@pytest.mark.parametrize('dtype', [np.complex64, np.complex128])
|
||||
def test_atan_grad_complex(dtype):
|
||||
"""
|
||||
Feature: ALL To ALL
|
||||
Description: test cases for AtanGrad
|
||||
Expectation: the result match to numpy
|
||||
"""
|
||||
x = np.array([-0.5, 0, 0.5]).astype(dtype)
|
||||
x = x + 0.5j * x
|
||||
dy = np.array([1, 0, -1]).astype(dtype)
|
||||
dy = dy + 0.3j * dy
|
||||
output = P.AtanGrad()(Tensor(x), Tensor(dy))
|
||||
print(output)
|
||||
expect = dy / np.conjugate(1 + x * x)
|
||||
assert np.allclose(output.asnumpy(), expect)
|
||||
|
|
|
@ -22,25 +22,74 @@ from mindspore.ops import operations as P
|
|||
context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
|
||||
np.random.seed(1)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_atan_fp32():
|
||||
"""
|
||||
Feature: ALL To ALL
|
||||
Description: test cases for Atan float32
|
||||
Expectation: the result match to numpy
|
||||
"""
|
||||
x_np = np.random.rand(4, 2).astype(np.float32) * 10
|
||||
output_ms = P.Atan()(Tensor(x_np))
|
||||
output_np = np.arctan(x_np)
|
||||
assert np.allclose(output_ms.asnumpy(), output_np, 1e-4, 1e-4)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_atan_fp16():
|
||||
"""
|
||||
Feature: ALL To ALL
|
||||
Description: test cases for Atan float16
|
||||
Expectation: the result match to numpy
|
||||
"""
|
||||
x_np = np.random.rand(4, 2).astype(np.float16) * 10
|
||||
output_ms = P.Atan()(Tensor(x_np))
|
||||
output_np = np.arctan(x_np.astype(np.float32)).astype(np.float16)
|
||||
assert np.allclose(output_ms.asnumpy(), output_np, 1e-3, 1e-3)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
@pytest.mark.parametrize('dtype', [np.float16, np.float32, np.float64])
|
||||
def test_atan_float(dtype):
|
||||
"""
|
||||
Feature: ALL To ALL
|
||||
Description: test cases for Atan
|
||||
Expectation: the result match to numpy
|
||||
"""
|
||||
np_array = np.array([-1, -0.5, 0, 0.5, 1], dtype=dtype)
|
||||
input_x = Tensor(np_array)
|
||||
output = P.Atan()(input_x)
|
||||
print(output)
|
||||
expect = np.arctan(np_array)
|
||||
assert np.allclose(output.asnumpy(), expect)
|
||||
|
||||
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
@pytest.mark.parametrize('dtype', [np.complex64, np.complex128])
|
||||
def test_atan_complex(dtype):
|
||||
"""
|
||||
Feature: ALL To ALL
|
||||
Description: test cases for Atan
|
||||
Expectation: the result match to numpy
|
||||
"""
|
||||
np_array = np.array([-1, -0.5, 0, 0.5, 1], dtype=dtype)
|
||||
np_array = np_array + 0.5j * np_array
|
||||
input_x = Tensor(np_array)
|
||||
output = P.Atan()(input_x)
|
||||
print(output)
|
||||
expect = np.arctan(np_array)
|
||||
assert np.allclose(output.asnumpy(), expect)
|
||||
|
||||
|
||||
def test_atan_forward_tensor_api(nptype):
|
||||
"""
|
||||
Feature: test atan forward tensor api for given input dtype.
|
||||
|
|
Loading…
Reference in New Issue