[assistant][ops] Fix Atan,AtanGrad,Atan2,FloorDiv,FloorMod GPU and CPU with DataType

This commit is contained in:
鄢立川 2022-09-12 15:48:37 +08:00 committed by yanlichuan
parent c03a1aeeff
commit 262faacc09
19 changed files with 522 additions and 46 deletions

View File

@ -21,6 +21,7 @@
#include <memory>
#include <string>
#include <typeinfo>
#include <complex>
#include <unordered_map>
#include <utility>
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
@ -150,6 +151,7 @@ class ArithmeticCpuTypeFunc : public CpuKernelFunc {
void Div(const T *input1, const T *input2, T *out);
void DivNoNan(const T *input1, const T *input2, T *out);
void FloorDiv(const T *input1, const T *input2, T *out);
void FloorDivComplex(const T *input1, const T *input2, T *out);
void Mod(const T *input1, const T *input2, T *out);
void FloorMod(const T *input1, const T *input2, T *out);
void Pow(const T *input1, const T *input2, T *out);
@ -223,6 +225,7 @@ class ArithmeticCpuTypeFunc : public CpuKernelFunc {
arithmeticMathFuncMap = {{kSquaredDifference, &ArithmeticCpuTypeFunc<T>::SquaredDifferenceComplex},
{kSub, &ArithmeticCpuTypeFunc<T>::Sub},
{kDiv, &ArithmeticCpuTypeFunc<T>::DivComplex},
{kFloorDiv, &ArithmeticCpuTypeFunc<T>::FloorDivComplex},
{kRealDiv, &ArithmeticCpuTypeFunc<T>::RealDivComplex},
{kMul, &ArithmeticCpuTypeFunc<T>::Mul},
{kDivNoNan, &ArithmeticCpuTypeFunc<T>::DivNoNan},
@ -612,6 +615,28 @@ void ArithmeticCpuTypeFunc<T>::FloorDiv(const T *input1, const T *input2, T *out
ParallelLaunchAutoSearch(task, output_size_, this, &parallel_search_info_);
}
template <typename T>
void ArithmeticCpuTypeFunc<T>::FloorDivComplex(const T *input1, const T *input2, T *out) {
BroadcastIterator base_iter(input_shape1_, input_shape2_, output_shape_);
auto task = [&input1, &input2, &out, &base_iter](size_t start, size_t end) {
auto iter = base_iter;
iter.SetPos(start);
for (size_t i = start; i < end; i++) {
auto dividend = input1[iter.GetInputPosA()];
auto divisor = input2[iter.GetInputPosB()];
iter.GenNextPos();
auto zero = static_cast<T>(0);
if (divisor == zero) {
out[i] = std::numeric_limits<T>::quiet_NaN();
continue;
}
auto temp = dividend / divisor;
out[i] = static_cast<T>(std::complex<double>(floor(temp.real()), 0));
}
};
ParallelLaunchAutoSearch(task, output_size_, this, &parallel_search_info_);
}
template <typename T>
void ArithmeticCpuTypeFunc<T>::Mod(const T *input1, const T *input2, T *out) {
if (!is_init_broadcast_) {
@ -1005,6 +1030,8 @@ static std::map<std::string, std::vector<std::pair<KernelAttr, ArithmeticCpuFunc
{kFloorDiv,
{{KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
SpecializeArithFunc<int8_t>},
{KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
SpecializeArithFunc<int16_t>},
{KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
SpecializeArithFunc<int>},
{KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
@ -1013,12 +1040,26 @@ static std::map<std::string, std::vector<std::pair<KernelAttr, ArithmeticCpuFunc
SpecializeArithFunc<uint8_t>},
{KernelAttr().AddInputAttr(kNumberTypeUInt16).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
SpecializeArithFunc<uint16_t>},
{KernelAttr().AddInputAttr(kNumberTypeUInt32).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
SpecializeArithFunc<uint32_t>},
{KernelAttr().AddInputAttr(kNumberTypeUInt64).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
SpecializeArithFunc<uint64_t>},
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
SpecializeArithFunc<float16>},
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
SpecializeArithFunc<float>},
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
SpecializeArithFunc<double>}}},
SpecializeArithFunc<double>},
{KernelAttr()
.AddInputAttr(kNumberTypeComplex64)
.AddInputAttr(kNumberTypeComplex64)
.AddOutputAttr(kNumberTypeComplex64),
SpecializeArithFunc<complex64>},
{KernelAttr()
.AddInputAttr(kNumberTypeComplex128)
.AddInputAttr(kNumberTypeComplex128)
.AddOutputAttr(kNumberTypeComplex128),
SpecializeArithFunc<complex128>}}},
{kMod,
{{KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
SpecializeArithFunc<int64_t>},
@ -1035,14 +1076,28 @@ static std::map<std::string, std::vector<std::pair<KernelAttr, ArithmeticCpuFunc
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
SpecializeArithFunc<double>}}},
{kFloorMod,
{{KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
SpecializeArithFunc<int64_t>},
{{KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
SpecializeArithFunc<int8_t>},
{KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
SpecializeArithFunc<int16_t>},
{KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
SpecializeArithFunc<int32_t>},
SpecializeArithFunc<int>},
{KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
SpecializeArithFunc<int64_t>},
{KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
SpecializeArithFunc<uint8_t>},
{KernelAttr().AddInputAttr(kNumberTypeUInt16).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
SpecializeArithFunc<uint16_t>},
{KernelAttr().AddInputAttr(kNumberTypeUInt32).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
SpecializeArithFunc<uint32_t>},
{KernelAttr().AddInputAttr(kNumberTypeUInt64).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
SpecializeArithFunc<uint64_t>},
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
SpecializeArithFunc<float16>},
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
SpecializeArithFunc<float>},
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
SpecializeArithFunc<float16>}}},
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
SpecializeArithFunc<double>}}},
{kAssignAdd,
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
SpecializeArithFunc<float>},
@ -1174,7 +1229,9 @@ static std::map<std::string, std::vector<std::pair<KernelAttr, ArithmeticCpuFunc
.AddOutputAttr(kNumberTypeComplex128),
SpecializeArithFunc<complex128>}}},
{kAtan2,
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
{{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
SpecializeArithFunc<float16>},
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
SpecializeArithFunc<float>},
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
SpecializeArithFunc<double>}}},

View File

@ -261,6 +261,16 @@ void Atan(ArithmeticSelfCpuKernelFunc *content, const T *in, T *out, size_t size
ParallelLaunchAutoSearch(task, size, content, &content->parallel_search_info_);
}
template <typename T>
void ComplexAtan(ArithmeticSelfCpuKernelFunc *content, const T *in, T *out, size_t size) {
auto task = [&in, &out](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
out[i] = static_cast<T>(atan(in[i]));
}
};
ParallelLaunchAutoSearch(task, size, content, &content->parallel_search_info_);
}
template <typename T>
void Sin(ArithmeticSelfCpuKernelFunc *content, const T *in, T *out, size_t size) {
auto task = [&in, &out](size_t start, size_t end) {
@ -736,6 +746,7 @@ void ArithmeticSelfCpuKernelFunc::LaunchKernelComplex(const std::vector<AddressP
{prim::kPrimReciprocal->name(), Reciprocal<T>},
{prim::kPrimSqrt->name(), Sqrt<T>},
{prim::kPrimTan->name(), Tan<T>},
{prim::kPrimAtan->name(), ComplexAtan<T>},
{prim::kPrimTanh->name(), Tanh<T>},
{prim::kPrimAtanh->name(), Atanh<T>},
{prim::kPrimInv->name(), Inv<T>},
@ -763,7 +774,8 @@ void ArithmeticSelfCpuKernelFunc::LaunchKernelFloat16(const std::vector<AddressP
{prim::kPrimSinh->name(), Sinh<float16>}, {prim::kPrimCosh->name(), Cosh<float16>},
{prim::kPrimAsinh->name(), Asinh<float16>}, {prim::kPrimErfc->name(), Erfc<float16>},
{prim::kPrimRsqrt->name(), Rsqrt<float16>}, {prim::kPrimErf->name(), Erf<float16>},
{prim::kPrimSign->name(), Sign<float16>}, {prim::kPrimRint->name(), Rint<float16>}};
{prim::kPrimSign->name(), Sign<float16>}, {prim::kPrimRint->name(), Rint<float16>},
{prim::kPrimAtan->name(), Atan<float16>}};
const auto func_pair = arithmeticSelfFuncMap.find(kernel_name_);
if (arithmeticSelfFuncMap.find(kernel_name_) == arithmeticSelfFuncMap.end()) {
MS_LOG(EXCEPTION) << "For 'ArithmeticSelf', it does not support " << kernel_name_ << " with float16 as input. ";
@ -966,7 +978,10 @@ static std::map<std::string, std::vector<std::pair<KernelAttr, ArithFuncCreator>
{KernelAttr().AddInputAttr(kNumberTypeComplex128).AddOutputAttr(kNumberTypeComplex128), CreateArithSelfFunc}}},
{kAtan,
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), CreateArithSelfFunc},
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), CreateArithSelfFunc}}},
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), CreateArithSelfFunc},
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), CreateArithSelfFunc},
{KernelAttr().AddInputAttr(kNumberTypeComplex64).AddOutputAttr(kNumberTypeComplex64), CreateArithSelfFunc},
{KernelAttr().AddInputAttr(kNumberTypeComplex128).AddOutputAttr(kNumberTypeComplex128), CreateArithSelfFunc}}},
{kSin,
{{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), CreateArithSelfFunc},
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), CreateArithSelfFunc},

View File

@ -69,6 +69,7 @@ class EltWiseGradCpuTypeFunc : public CpuKernelFunc {
void ACosGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
void ComplexACosGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
void AtanGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
void ComplexAtanGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
void AsinhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
void ComplexAsinhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
void InvGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
@ -312,18 +313,20 @@ void EltWiseGradCpuTypeFunc<T>::ComplexACosGrad(const T *input1, const T *input2
template <typename T>
void EltWiseGradCpuTypeFunc<T>::AtanGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const {
auto one = static_cast<T>(1);
auto zero = static_cast<T>(0);
for (size_t i = start; i < end; i++) {
T dividend = input2[i];
T divisor = 1 + input1[i] * input1[i];
if (std::equal_to<T>()(divisor, 0)) {
if (std::equal_to<T>()(dividend, 0)) {
T divisor = one + input1[i] * input1[i];
if (std::equal_to<T>()(divisor, zero)) {
if (std::equal_to<T>()(dividend, zero)) {
out[i] = std::numeric_limits<T>::quiet_NaN();
continue;
}
if (std::numeric_limits<T>::has_infinity) {
out[i] = dividend > 0 ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity();
out[i] = dividend > zero ? std::numeric_limits<T>::infinity() : -std::numeric_limits<T>::infinity();
} else {
out[i] = dividend > 0 ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min();
out[i] = dividend > zero ? std::numeric_limits<T>::max() : std::numeric_limits<T>::min();
}
continue;
}
@ -331,6 +334,20 @@ void EltWiseGradCpuTypeFunc<T>::AtanGrad(const T *input1, const T *input2, T *ou
}
}
template <typename T>
void EltWiseGradCpuTypeFunc<T>::ComplexAtanGrad(const T *input1, const T *input2, T *out, size_t start,
size_t end) const {
for (size_t i = start; i < end; i++) {
T dividend = input2[i];
T divisor = static_cast<T>(1) + input1[i] * input1[i];
if (std::equal_to<T>()(divisor, static_cast<T>(0))) {
out[i] = std::numeric_limits<T>::quiet_NaN();
continue;
}
out[i] = dividend / conj(divisor);
}
}
template <typename T>
void EltWiseGradCpuTypeFunc<T>::AsinhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const {
for (size_t i = start; i < end; i++) {
@ -450,7 +467,8 @@ void EltWiseGradCpuTypeFunc<T>::InitFunc(const BaseOperatorPtr &base_operator, c
if constexpr (std::is_same_v<T, float16>) {
static const std::map<std::string,
std::function<void(EltWiseGradCpuTypeFunc *, const T *, const T *, T *, size_t, size_t)>>
elt_map{{prim::kPrimReluGrad->name(), &EltWiseGradCpuTypeFunc<T>::ReluGrad},
elt_map{{prim::kPrimAtanGrad->name(), &EltWiseGradCpuTypeFunc<T>::AtanGrad},
{prim::kPrimReluGrad->name(), &EltWiseGradCpuTypeFunc<T>::ReluGrad},
{prim::kPrimReciprocalGrad->name(), &EltWiseGradCpuTypeFunc<T>::ReciprocalGrad},
{prim::kPrimRsqrtGrad->name(), &EltWiseGradCpuTypeFunc<T>::RsqrtGrad}};
if (elt_map.find(kernel_name_) == elt_map.end()) {
@ -462,8 +480,7 @@ void EltWiseGradCpuTypeFunc<T>::InitFunc(const BaseOperatorPtr &base_operator, c
if constexpr (std::is_same_v<T, float>) {
static const std::map<std::string,
std::function<void(EltWiseGradCpuTypeFunc *, const T *, const T *, T *, size_t, size_t)>>
elt_map{{prim::kPrimReluGrad->name(), &EltWiseGradCpuTypeFunc<T>::ReluGrad},
{prim::kPrimRelu6Grad->name(), &EltWiseGradCpuTypeFunc<T>::ReLU6Grad},
elt_map{{prim::kPrimRelu6Grad->name(), &EltWiseGradCpuTypeFunc<T>::ReLU6Grad},
{prim::kPrimSigmoidGrad->name(), &EltWiseGradCpuTypeFunc<T>::SigmoidGrad},
{prim::kPrimAbsGrad->name(), &EltWiseGradCpuTypeFunc<T>::AbsGrad},
{prim::kPrimTanhGrad->name(), &EltWiseGradCpuTypeFunc<T>::TanhGrad},
@ -516,6 +533,7 @@ void EltWiseGradCpuTypeFunc<T>::InitFunc(const BaseOperatorPtr &base_operator, c
{prim::kPrimAsinGrad->name(), &EltWiseGradCpuTypeFunc<T>::ComplexAsinGrad},
{prim::kPrimACosGrad->name(), &EltWiseGradCpuTypeFunc<T>::ComplexACosGrad},
{prim::kPrimTanhGrad->name(), &EltWiseGradCpuTypeFunc<T>::TanhGrad},
{prim::kPrimAtanGrad->name(), &EltWiseGradCpuTypeFunc<T>::ComplexAtanGrad},
{prim::kPrimInvGrad->name(), &EltWiseGradCpuTypeFunc<T>::InvGrad},
{prim::kPrimSqrtGrad->name(), &EltWiseGradCpuTypeFunc<T>::SqrtGrad},
{prim::kPrimReciprocalGrad->name(), &EltWiseGradCpuTypeFunc<T>::ReciprocalGrad},
@ -649,8 +667,22 @@ static std::map<std::string, std::vector<std::pair<KernelAttr, FuncCreator>>> ke
.AddOutputAttr(kNumberTypeComplex128),
&SpecializeEltWiseGradFunc<complex128>}}},
{kAtanGrad,
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
&SpecializeEltWiseGradFunc<float>}}},
{{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
&SpecializeEltWiseGradFunc<float16>},
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
&SpecializeEltWiseGradFunc<float>},
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
&SpecializeEltWiseGradFunc<double>},
{KernelAttr()
.AddInputAttr(kNumberTypeComplex64)
.AddInputAttr(kNumberTypeComplex64)
.AddOutputAttr(kNumberTypeComplex64),
&SpecializeEltWiseGradFunc<complex64>},
{KernelAttr()
.AddInputAttr(kNumberTypeComplex128)
.AddInputAttr(kNumberTypeComplex128)
.AddOutputAttr(kNumberTypeComplex128),
&SpecializeEltWiseGradFunc<complex128>}}},
{kAsinhGrad,
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
&SpecializeEltWiseGradFunc<float>},

View File

@ -394,6 +394,19 @@ HOST_DEVICE inline Complex<T> asinh(const Complex<T> &z) {
return result;
}
template <typename T>
HOST_DEVICE inline Complex<T> atan(const Complex<T> &z) {
Complex<T> result;
#if defined(__CUDACC__)
auto thrust_result = thrust::atan(thrust::complex<T>(z));
result.real(thrust_result.real());
result.imag(thrust_result.imag());
#else
result(std::tan(std::complex<T>(z)));
#endif
return result;
}
template <typename T>
HOST_DEVICE inline Complex<T> atanh(const Complex<T> &z) {
Complex<T> result;

View File

@ -189,6 +189,27 @@ __global__ void AtanGradKernel(const T *input, const T *dout, T *output, const s
return;
}
template <>
__global__ void AtanGradKernel(const double *input, const double *dout, double *output, const size_t count) {
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) {
double one = 1;
double divisor = one + input[i] * input[i];
output[i] = dout[i] / divisor;
}
return;
}
template <typename T>
__global__ void AtanGradKernel(const Complex<T> *input, const Complex<T> *dout, Complex<T> *output,
const size_t count) {
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) {
Complex<T> one = Complex<T>(1);
Complex<T> divisor = one + input[i] * input[i];
output[i] = dout[i] / conj(divisor);
}
return;
}
template <typename T>
__global__ void TanhGradKernel(const T *__restrict__ input, const T *dout, T *output, const size_t count) {
const T one = static_cast<T>(1);
@ -346,6 +367,13 @@ void AtanGrad(const T *input, const T *dout, T *output, const size_t count, cuda
return;
}
template <typename T>
void AtanGrad(const Complex<T> *input, const Complex<T> *dout, Complex<T> *output, const size_t count,
cudaStream_t cuda_stream) {
AtanGradKernel<<<GET_BLOCKS(count), GET_THREADS, 0, cuda_stream>>>(input, dout, output, count);
return;
}
template <typename T>
void TanhGrad(const T *input, const T *dout, T *output, const size_t count, cudaStream_t cuda_stream) {
TanhGradKernel<<<GET_BLOCKS(count), GET_THREADS, 0, cuda_stream>>>(input, dout, output, count);
@ -629,6 +657,13 @@ template CUDA_LIB_EXPORT void InvGrad<Complex<float>>(const Complex<float> *inpu
template CUDA_LIB_EXPORT void RsqrtGrad<Complex<float>>(const Complex<float> *input, const Complex<float> *dout,
Complex<float> *output, const size_t count,
cudaStream_t cuda_stream);
template CUDA_LIB_EXPORT void AtanGrad<Complex<float>>(const Complex<float> *input, const Complex<float> *dout,
Complex<float> *output, const size_t count,
cudaStream_t cuda_stream);
template CUDA_LIB_EXPORT void AtanGrad<Complex<double>>(const Complex<double> *input, const Complex<double> *dout,
Complex<double> *output, const size_t count,
cudaStream_t cuda_stream);
template CUDA_LIB_EXPORT void RsqrtGrad<Complex<double>>(const Complex<double> *input, const Complex<double> *dout,
Complex<double> *output, const size_t count,
cudaStream_t cuda_stream);

View File

@ -535,6 +535,13 @@ __global__ void AtanKernel(const double *input, double *output, const size_t cou
return;
}
template <typename T>
__global__ void AtanKernel(const Complex<T> *input, Complex<T> *output, const size_t count) {
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) {
output[i] = atan(input[i]);
}
return;
}
template <typename T>
__global__ void TanhKernel(const Complex<T> *input, Complex<T> *output, const size_t count) {
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (count); i += blockDim.x * gridDim.x) {
output[i] = tanh(input[i]);
@ -1007,6 +1014,11 @@ void Atan(const T *input, T *output, const size_t count, cudaStream_t cuda_strea
return;
}
template <typename T>
void Atan(const Complex<T> *input, Complex<T> *output, const size_t count, cudaStream_t cuda_stream) {
AtanKernel<<<GET_BLOCKS(count), GET_THREADS, 0, cuda_stream>>>(input, output, count);
return;
}
template <typename T>
void Asinh(const T *input, T *output, const size_t count, cudaStream_t cuda_stream) {
AsinhKernel<<<GET_BLOCKS(count), GET_THREADS, 0, cuda_stream>>>(input, output, count);
return;
@ -1835,6 +1847,8 @@ template CUDA_LIB_EXPORT void Sign<Complex<float>>(const Complex<float> *input,
const size_t count, cudaStream_t cuda_stream);
template CUDA_LIB_EXPORT void Cosh<Complex<float>>(const Complex<float> *input, Complex<float> *output,
const size_t count, cudaStream_t cuda_stream);
template CUDA_LIB_EXPORT void Atan<Complex<float>>(const Complex<float> *input, Complex<float> *output,
const size_t count, cudaStream_t cuda_stream);
template CUDA_LIB_EXPORT void Atanh<Complex<float>>(const Complex<float> *input, Complex<float> *output,
const size_t count, cudaStream_t cuda_stream);
template CUDA_LIB_EXPORT void Reciprocal<Complex<float>>(const Complex<float> *input, Complex<float> *output,
@ -1877,6 +1891,8 @@ template CUDA_LIB_EXPORT void Asinh<Complex<double>>(const Complex<double> *inpu
const size_t count, cudaStream_t cuda_stream);
template CUDA_LIB_EXPORT void Tan<Complex<double>>(const Complex<double> *input, Complex<double> *output,
const size_t count, cudaStream_t cuda_stream);
template CUDA_LIB_EXPORT void Atan<Complex<double>>(const Complex<double> *input, Complex<double> *output,
const size_t count, cudaStream_t cuda_stream);
template CUDA_LIB_EXPORT void Rsqrt<Complex<double>>(const Complex<double> *input, Complex<double> *output,
const size_t count, cudaStream_t cuda_stream);
template CUDA_LIB_EXPORT void Sign<Complex<double>>(const Complex<double> *input, Complex<double> *output,

View File

@ -327,7 +327,13 @@ std::map<std::string, std::vector<std::pair<KernelAttr, UnaryOpGpuKernelMod::Una
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
&UnaryOpGpuKernelMod::LaunchKernel<float>},
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
&UnaryOpGpuKernelMod::LaunchKernel<half>}}},
&UnaryOpGpuKernelMod::LaunchKernel<half>},
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
&UnaryOpGpuKernelMod::LaunchKernel<double>},
{KernelAttr().AddInputAttr(kNumberTypeComplex64).AddOutputAttr(kNumberTypeComplex64),
&UnaryOpGpuKernelMod::LaunchKernel<utils::Complex<float>>},
{KernelAttr().AddInputAttr(kNumberTypeComplex128).AddOutputAttr(kNumberTypeComplex128),
&UnaryOpGpuKernelMod::LaunchKernel<utils::Complex<double>>}}},
{kAtanh,
{{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
&UnaryOpGpuKernelMod::LaunchKernel<double>},
@ -473,7 +479,7 @@ bool UnaryOpGpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &in
{kInv, Inv<T>}, {kLog, Logarithm<T>}, {kExp, Exponential<T>}, {kNeg, Negative<T>},
{kSin, Sin<T>}, {kCos, Cos<T>}, {kACos, ACos<T>}, {kAcosh, Acosh<T>},
{kAsin, Asin<T>}, {kAsinh, Asinh<T>}, {kSquare, Square<T>}, {kReciprocal, Reciprocal<T>},
{kRsqrt, Rsqrt<T>}, {kSign, Sign<T>}};
{kRsqrt, Rsqrt<T>}, {kSign, Sign<T>}, {kAtan, Atan<T>}};
copy(func_map_complex.begin(), func_map_complex.end(), inserter(func_map, func_map.begin()));
} else {
std::map<std::string, std::function<void(const T *, T *, const size_t, cudaStream_t)>> func_map_normal = {

View File

@ -110,7 +110,19 @@ std::map<std::string, std::vector<std::pair<KernelAttr, UnaryGradOpGpuKernelMod:
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
&UnaryGradOpGpuKernelMod::LaunchKernel<float>},
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
&UnaryGradOpGpuKernelMod::LaunchKernel<half>}}},
&UnaryGradOpGpuKernelMod::LaunchKernel<half>},
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
&UnaryGradOpGpuKernelMod::LaunchKernel<double>},
{KernelAttr()
.AddInputAttr(kNumberTypeComplex64)
.AddInputAttr(kNumberTypeComplex64)
.AddOutputAttr(kNumberTypeComplex64),
&UnaryGradOpGpuKernelMod::LaunchKernel<utils::Complex<float>>},
{KernelAttr()
.AddInputAttr(kNumberTypeComplex128)
.AddInputAttr(kNumberTypeComplex128)
.AddOutputAttr(kNumberTypeComplex128),
&UnaryGradOpGpuKernelMod::LaunchKernel<utils::Complex<double>>}}},
{kAsinhGrad,
{{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
&UnaryGradOpGpuKernelMod::LaunchKernel<float>},
@ -246,7 +258,8 @@ bool UnaryGradOpGpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr>
{kAsinGrad, AsinGrad<T>},
{kAsinhGrad, AsinhGrad<T>},
{kSqrtGrad, SqrtGrad<T>},
{kRsqrtGrad, RsqrtGrad<T>}};
{kRsqrtGrad, RsqrtGrad<T>},
{kAtanGrad, AtanGrad<T>}};
copy(func_map_complex.begin(), func_map_complex.end(), inserter(func_map, func_map.begin()));
} else {
std::map<std::string, std::function<void(const T *, const T *, T *, const size_t, cudaStream_t)>> func_map_normal =

View File

@ -48,7 +48,7 @@ TypePtr AtanInferType(const PrimitivePtr &primitive, const std::vector<AbstractB
auto prim_name = primitive->name();
MS_EXCEPTION_IF_NULL(input_args[0]);
auto x_type = input_args[0]->BuildType();
const std::set valid_types = {kFloat16, kFloat32, kFloat64};
const std::set valid_types = {kFloat16, kFloat32, kFloat64, kComplex64, kComplex128};
(void)CheckAndConvertUtils::CheckTensorTypeValid("input_x", x_type, valid_types, prim_name);
return x_type;
}

View File

@ -60,8 +60,8 @@ TypePtr FloorDivInferType(const PrimitivePtr &primitive, const std::vector<Abstr
<< " one of the inputs must be tensor type but got " << input_type01->ToString() << " and "
<< input_type02->ToString() << ".";
}
const std::set<TypePtr> valid_types = {kFloat16, kFloat32, kFloat64, kInt8, kInt16,
kInt32, kInt64, kUInt8, kUInt16, kBool};
const std::set<TypePtr> valid_types = {kFloat16, kFloat32, kFloat64, kInt8, kInt16, kInt32, kInt64,
kUInt8, kUInt16, kUInt32, kUInt64, kBool, kComplex64, kComplex128};
(void)CheckAndConvertUtils::CheckTypeValid("x", input_type01, valid_types, prim_name);
(void)CheckAndConvertUtils::CheckTypeValid("y", input_type02, valid_types, prim_name);
return input_type01;

View File

@ -38,7 +38,7 @@ TypePtr AtanGradInferType(const PrimitivePtr &primitive, const std::vector<Abstr
MS_EXCEPTION_IF_NULL(input_args[0]);
auto x_type = input_args[0]->BuildType();
MS_EXCEPTION_IF_NULL(x_type);
const std::set<TypePtr> valid_types = {kFloat16, kFloat32};
const std::set<TypePtr> valid_types = {kFloat16, kFloat32, kFloat64, kComplex64, kComplex128};
(void)CheckAndConvertUtils::CheckTensorTypeValid("input_x", x_type, valid_types, prim_name);
return x_type;
}

View File

@ -1914,6 +1914,7 @@ def atan2(x, y):
Args:
x (Tensor): The input tensor.
:math:`(N,*)` where :math:`*` means, any number of additional dimensions.
The data type should be one of the following types: float16, float32, float64
y (Tensor): The input tensor. It has the same shape with `x`.
Returns:

View File

@ -20,7 +20,6 @@ import mindspore.nn as nn
from mindspore import Tensor
from mindspore import context
from mindspore.ops import operations as P
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
@ -36,16 +35,14 @@ class NetAtan2(nn.Cell):
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_atan2():
np_array = np.array([1, 2, 3, 4, 5]).astype('float32')
input_x = Tensor(np_array)
net = NetAtan2()
output = net(input_x, input_x)
print(output)
expect = np.arctan2(np_array, np_array)
assert np.allclose(output.asnumpy(), expect)
np_array = np.array([1, 2, 3, 4, 5], dtype=np.float64)
@pytest.mark.parametrize('dtype', [np.float16, np.float32, np.float64])
def test_atan2(dtype):
"""
Feature: ALL To ALL
Description: test cases for Atan2
Expectation: the result match to numpy
"""
np_array = np.array([1, 2, 3, 4, 5]).astype(dtype)
input_x = Tensor(np_array)
net = NetAtan2()
output = net(input_x, input_x)

View File

@ -20,27 +20,53 @@ import mindspore.nn as nn
from mindspore import Tensor
from mindspore import context
from mindspore.ops.operations import _grad_ops as G
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
class NetAtanGrad(nn.Cell):
def __init__(self):
super(NetAtanGrad, self).__init__()
self.atanGrad = G.AtanGrad()
self.atan_grad = G.AtanGrad()
def construct(self, x, dy):
return self.atanGrad(x, dy)
return self.atan_grad(x, dy)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_atan_grad():
x = np.array([-0.5, 0, 0.5]).astype('float32')
dy = np.array([1, 0, -1]).astype('float32')
@pytest.mark.parametrize('dtype', [np.float16, np.float32, np.float64])
def test_atan_grad_float(dtype):
"""
Feature: ALL To ALL
Description: test cases for AtanGrad
Expectation: the result match to numpy
"""
x = np.array([-0.5, 0, 0.5]).astype(dtype)
dy = np.array([1, 0, -1]).astype(dtype)
atan_grad = NetAtanGrad()
output = atan_grad(Tensor(x), Tensor(dy))
print(output)
expect = dy / (1 + x * x)
assert np.allclose(output.asnumpy(), expect)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
@pytest.mark.parametrize('dtype', [np.complex64, np.complex128])
def test_atan_grad_complex(dtype):
"""
Feature: ALL To ALL
Description: test cases for AtanGrad
Expectation: the result match to numpy
"""
x = np.array([-0.5, 0, 0.5]).astype(dtype)
x = x + 0.5j * x
dy = np.array([1, 0, -1]).astype(dtype)
dy = dy + 0.3j * dy
atan_grad = NetAtanGrad()
output = atan_grad(Tensor(x), Tensor(dy))
print(output)
expect = dy / np.conjugate(1 + x * x)
assert np.allclose(output.asnumpy(), expect)

View File

@ -20,7 +20,6 @@ import mindspore.nn as nn
from mindspore import Tensor
from mindspore import context
from mindspore.ops import operations as P
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
@ -79,5 +78,44 @@ def test_atan_forward_float32_tensor_api():
test_atan_forward_tensor_api(np.float32)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
@pytest.mark.parametrize('dtype', [np.float16, np.float32, np.float64])
def test_atan_float(dtype):
"""
Feature: ALL To ALL
Description: test cases for Atan
Expectation: the result match to numpy
"""
np_array = np.array([-1, -0.5, 0, 0.5, 1], dtype=dtype)
input_x = Tensor(np_array)
net = NetAtan()
output = net(input_x)
print(output)
expect = np.arctan(np_array)
assert np.allclose(output.asnumpy(), expect)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
@pytest.mark.parametrize('dtype', [np.complex64, np.complex128])
def test_atan_complex(dtype):
"""
Feature: ALL To ALL
Description: test cases for Atan
Expectation: the result match to numpy
"""
np_array = np.array([-1, -0.5, 0, 0.5, 1], dtype=dtype)
np_array = np_array + 0.5j * np_array
input_x = Tensor(np_array)
net = NetAtan()
output = net(input_x)
print(output)
expect = np.arctan(np_array)
assert np.allclose(output.asnumpy(), expect)
if __name__ == '__main__':
test_atan_forward_float32_tensor_api()

View File

@ -0,0 +1,75 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
class NetFloorDiv(nn.Cell):
def __init__(self):
super(NetFloorDiv, self).__init__()
self.floordiv = P.FloorDiv()
def construct(self, x, y):
return self.floordiv(x, y)
@pytest.mark.level1
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
@pytest.mark.parametrize('dtype', [np.float16, np.float32, np.float64, np.int8, np.int16, np.int32,
np.int64, np.uint8, np.uint16, np.uint32, np.uint64])
def testtype_floor_div_int_float(dtype):
"""
Feature: ALL To ALL
Description: test cases for FloorDiv
Expectation: the result match to numpy
"""
x_np = np.random.rand(1, 5).astype(dtype)
y_np = np.random.rand(1, 5).astype(dtype)
expect = np.floor_divide(x_np, y_np)
x_input = Tensor(x_np)
y_input = Tensor(y_np)
floor_div = NetFloorDiv()
output = floor_div(x_input, y_input)
assert np.allclose(output.asnumpy(), expect)
@pytest.mark.level1
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
@pytest.mark.parametrize('dtype', [np.complex64, np.complex128])
def testtype_floor_div_complex(dtype):
"""
Feature: ALL To ALL
Description: test cases for FloorDiv
Expectation: the result match to numpy
"""
x_np = np.random.rand(1, 5).astype(dtype)
x_np = x_np + 0.5j * x_np
y_np = np.random.rand(1, 5).astype(dtype)
y_np = y_np + 0.4j * y_np
expect = np.floor_divide(x_np, y_np)
x_input = Tensor(x_np)
y_input = Tensor(y_np)
floor_div = NetFloorDiv()
output = floor_div(x_input, y_input)
assert np.allclose(output.asnumpy(), expect)

View File

@ -0,0 +1,53 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
class FloorModNet(nn.Cell):
def __init__(self):
super(FloorModNet, self).__init__()
self.floor_mod = P.FloorMod()
def construct(self, x, y):
return self.floor_mod(x, y)
@pytest.mark.level1
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
@pytest.mark.parametrize('dtype', [np.float16, np.float32, np.float64, np.int8, np.int16, np.int32,
np.int64, np.uint8, np.uint16, np.uint32, np.uint64])
def testtype_floor_mod(dtype):
"""
Feature: ALL To ALL
Description: test cases for FloorMod
Expectation: the result match to numpy
"""
x_np = np.random.rand(1, 5).astype(dtype)
y_np = np.random.rand(1, 5).astype(dtype) + 1
expect = np.mod(x_np, y_np)
x_input = Tensor(x_np)
y_input = Tensor(y_np)
floor_mod = FloorModNet()
output = floor_mod(x_input, y_input)
assert np.allclose(output.asnumpy(), expect)

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -22,22 +22,72 @@ import mindspore.ops.operations._grad_ops as P
context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
np.random.seed(1)
@pytest.mark.level1
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_atangrad_fp32():
"""
Feature: ALL To ALL
Description: test cases for AtanGrad float32
Expectation: the result match to numpy
"""
x_np = np.random.rand(4, 2).astype(np.float32) * 10
dout_np = np.random.rand(4, 2).astype(np.float32) * 10
output_ms = P.AtanGrad()(Tensor(x_np), Tensor(dout_np))
output_np = dout_np / (1 + np.square(x_np))
assert np.allclose(output_ms.asnumpy(), output_np, 1e-4, 1e-4)
@pytest.mark.level1
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_atangrad_fp16():
"""
Feature: ALL To ALL
Description: test cases for AtanGrad float16
Expectation: the result match to numpy
"""
x_np = np.random.rand(4, 2).astype(np.float16) * 10
dout_np = np.random.rand(4, 2).astype(np.float16) * 10
output_ms = P.AtanGrad()(Tensor(x_np), Tensor(dout_np))
output_np = dout_np.astype(np.float32) / (1 + np.square(x_np.astype(np.float32)))
assert np.allclose(output_ms.asnumpy(), output_np.astype(np.float16), 1e-3, 1e-3)
@pytest.mark.level1
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
@pytest.mark.parametrize('dtype', [np.float16, np.float32, np.float64])
def test_atan_grad_float(dtype):
"""
Feature: ALL To ALL
Description: test cases for AtanGrad
Expectation: the result match to numpy
"""
x = np.array([-0.5, 0, 0.5]).astype(dtype)
dy = np.array([1, 0, -1]).astype(dtype)
output = P.AtanGrad()(Tensor(x), Tensor(dy))
print(output)
expect = dy / (1 + x * x)
assert np.allclose(output.asnumpy(), expect)
@pytest.mark.level1
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
@pytest.mark.parametrize('dtype', [np.complex64, np.complex128])
def test_atan_grad_complex(dtype):
"""
Feature: ALL To ALL
Description: test cases for AtanGrad
Expectation: the result match to numpy
"""
x = np.array([-0.5, 0, 0.5]).astype(dtype)
x = x + 0.5j * x
dy = np.array([1, 0, -1]).astype(dtype)
dy = dy + 0.3j * dy
output = P.AtanGrad()(Tensor(x), Tensor(dy))
print(output)
expect = dy / np.conjugate(1 + x * x)
assert np.allclose(output.asnumpy(), expect)

View File

@ -22,25 +22,74 @@ from mindspore.ops import operations as P
context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
np.random.seed(1)
@pytest.mark.level1
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_atan_fp32():
"""
Feature: ALL To ALL
Description: test cases for Atan float32
Expectation: the result match to numpy
"""
x_np = np.random.rand(4, 2).astype(np.float32) * 10
output_ms = P.Atan()(Tensor(x_np))
output_np = np.arctan(x_np)
assert np.allclose(output_ms.asnumpy(), output_np, 1e-4, 1e-4)
@pytest.mark.level1
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_atan_fp16():
"""
Feature: ALL To ALL
Description: test cases for Atan float16
Expectation: the result match to numpy
"""
x_np = np.random.rand(4, 2).astype(np.float16) * 10
output_ms = P.Atan()(Tensor(x_np))
output_np = np.arctan(x_np.astype(np.float32)).astype(np.float16)
assert np.allclose(output_ms.asnumpy(), output_np, 1e-3, 1e-3)
@pytest.mark.level1
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
@pytest.mark.parametrize('dtype', [np.float16, np.float32, np.float64])
def test_atan_float(dtype):
"""
Feature: ALL To ALL
Description: test cases for Atan
Expectation: the result match to numpy
"""
np_array = np.array([-1, -0.5, 0, 0.5, 1], dtype=dtype)
input_x = Tensor(np_array)
output = P.Atan()(input_x)
print(output)
expect = np.arctan(np_array)
assert np.allclose(output.asnumpy(), expect)
@pytest.mark.level1
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
@pytest.mark.parametrize('dtype', [np.complex64, np.complex128])
def test_atan_complex(dtype):
"""
Feature: ALL To ALL
Description: test cases for Atan
Expectation: the result match to numpy
"""
np_array = np.array([-1, -0.5, 0, 0.5, 1], dtype=dtype)
np_array = np_array + 0.5j * np_array
input_x = Tensor(np_array)
output = P.Atan()(input_x)
print(output)
expect = np.arctan(np_array)
assert np.allclose(output.asnumpy(), expect)
def test_atan_forward_tensor_api(nptype):
"""
Feature: test atan forward tensor api for given input dtype.