[PowerPC] Provide fastmath sqrt and div functions in altivec.h

This adds the long overdue implementations of these functions
that have been part of the ABI document and are now part of
the "Power Vector Intrinsic Programming Reference" (PVIPR).

The approach is to add new builtins and to emit code with
the fast flag regardless of whether fastmath was specified
on the command line.

Differential revision: https://reviews.llvm.org/D101209
This commit is contained in:
Nemanja Ivanovic 2021-04-30 18:54:44 -05:00
parent 7994615ea0
commit c3da07d216
5 changed files with 83 additions and 0 deletions

View File

@ -600,6 +600,12 @@ BUILTIN(__builtin_truncf128_round_to_odd, "dLLd", "")
BUILTIN(__builtin_vsx_scalar_extract_expq, "ULLiLLd", "") BUILTIN(__builtin_vsx_scalar_extract_expq, "ULLiLLd", "")
BUILTIN(__builtin_vsx_scalar_insert_exp_qp, "LLdLLdULLi", "") BUILTIN(__builtin_vsx_scalar_insert_exp_qp, "LLdLLdULLi", "")
// Fastmath by default builtins
BUILTIN(__builtin_ppc_rsqrtf, "V4fV4f", "")
BUILTIN(__builtin_ppc_rsqrtd, "V2dV2d", "")
BUILTIN(__builtin_ppc_recipdivf, "V4fV4fV4f", "")
BUILTIN(__builtin_ppc_recipdivd, "V2dV2dV2d", "")
// HTM builtins // HTM builtins
BUILTIN(__builtin_tbegin, "UiUIi", "") BUILTIN(__builtin_tbegin, "UiUIi", "")
BUILTIN(__builtin_tend, "UiUIi", "") BUILTIN(__builtin_tend, "UiUIi", "")

View File

@ -15113,6 +15113,25 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, X); return Builder.CreateCall(F, X);
} }
// Fastmath by default
case PPC::BI__builtin_ppc_recipdivf:
case PPC::BI__builtin_ppc_recipdivd:
case PPC::BI__builtin_ppc_rsqrtf:
case PPC::BI__builtin_ppc_rsqrtd: {
Builder.getFastMathFlags().setFast();
llvm::Type *ResultType = ConvertType(E->getType());
Value *X = EmitScalarExpr(E->getArg(0));
if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||
BuiltinID == PPC::BI__builtin_ppc_recipdivd) {
Value *Y = EmitScalarExpr(E->getArg(1));
return Builder.CreateFDiv(X, Y, "recipdiv");
}
auto *One = ConstantFP::get(ResultType, 1.0);
llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
return Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");
}
// FMA variations // FMA variations
case PPC::BI__builtin_vsx_xvmaddadp: case PPC::BI__builtin_vsx_xvmaddadp:
case PPC::BI__builtin_vsx_xvmaddasp: case PPC::BI__builtin_vsx_xvmaddasp:

View File

@ -8359,6 +8359,16 @@ static __inline__ vector double __ATTRS_o_ai vec_rsqrte(vector double __a) {
} }
#endif #endif
static vector float __ATTRS_o_ai vec_rsqrt(vector float __a) {
return __builtin_ppc_rsqrtf(__a);
}
#ifdef __VSX__
static vector double __ATTRS_o_ai vec_rsqrt(vector double __a) {
return __builtin_ppc_rsqrtd(__a);
}
#endif
/* vec_vrsqrtefp */ /* vec_vrsqrtefp */
static __inline__ __vector float __attribute__((__always_inline__)) static __inline__ __vector float __attribute__((__always_inline__))
@ -17897,6 +17907,18 @@ static vector signed char __ATTRS_o_ai vec_nabs(vector signed char __a) {
return __builtin_altivec_vminsb(__a, -__a); return __builtin_altivec_vminsb(__a, -__a);
} }
static vector float __ATTRS_o_ai vec_recipdiv(vector float __a,
vector float __b) {
return __builtin_ppc_recipdivf(__a, __b);
}
#ifdef __VSX__
static vector double __ATTRS_o_ai vec_recipdiv(vector double __a,
vector double __b) {
return __builtin_ppc_recipdivd(__a, __b);
}
#endif
#ifdef __POWER10_VECTOR__ #ifdef __POWER10_VECTOR__
/* vec_extractm */ /* vec_extractm */

View File

@ -9577,3 +9577,21 @@ void test12() {
// CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 1 // CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 1
// CHECK-LE: call void @llvm.ppc.vsx.stxvw4x.be(<4 x i32> %{{[0-9]+}}, i8* %{{[0-9]+}}) // CHECK-LE: call void @llvm.ppc.vsx.stxvw4x.be(<4 x i32> %{{[0-9]+}}, i8* %{{[0-9]+}})
} }
vector float test_rsqrtf(vector float a, vector float b) {
// CHECK-LABEL: test_rsqrtf
// CHECK: call fast <4 x float> @llvm.sqrt.v4f32
// CHECK: fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
// CHECK-LE-LABEL: test_rsqrtf
// CHECK-LE: call fast <4 x float> @llvm.sqrt.v4f32
// CHECK-LE: fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
return vec_rsqrt(a);
}
vector float test_recipdivf(vector float a, vector float b) {
// CHECK-LABEL: test_recipdivf
// CHECK: fdiv fast <4 x float>
// CHECK-LE-LABEL: test_recipdivf
// CHECK-LE: fdiv fast <4 x float>
return vec_recipdiv(a, b);
}

View File

@ -2283,3 +2283,21 @@ void test_builtin_xvcpsgndp(vector double a, vector double b) {
// CHECK-NEXT: call <2 x double> @llvm.copysign.v2f64(<2 x double> [[RA]], <2 x double> [[RB]]) // CHECK-NEXT: call <2 x double> @llvm.copysign.v2f64(<2 x double> [[RA]], <2 x double> [[RB]])
__builtin_vsx_xvcpsgndp(a, b); __builtin_vsx_xvcpsgndp(a, b);
} }
vector double test_recipdivd(vector double a, vector double b) {
// CHECK-LABEL: test_recipdivd
// CHECK: fdiv fast <2 x double>
// CHECK-LE-LABEL: test_recipdivd
// CHECK-LE: fdiv fast <2 x double>
return vec_recipdiv(a, b);
}
vector double test_rsqrtd(vector double a, vector double b) {
// CHECK-LABEL: test_rsqrtd
// CHECK: call fast <2 x double> @llvm.sqrt.v2f64
// CHECK: fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>
// CHECK-LE-LABEL: test_rsqrtd
// CHECK-LE: call fast <2 x double> @llvm.sqrt.v2f64
// CHECK-LE: fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>
return vec_rsqrt(a);
}