[AArch64] Add support for NEON scalar floating-point compare instructions.

llvm-svn: 193692
This commit is contained in:
Chad Rosier 2013-10-30 15:20:07 +00:00
parent be020d0309
commit 4d55e6e0a4
4 changed files with 304 additions and 15 deletions

View File

@ -169,6 +169,7 @@ class NoTestOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {}
// z: scalar of half width element type, signed
// r: scalar of double width element type, signed
// a: scalar of element type (splat to vector type)
// b: scalar of unsigned integer/long type (int/float args)
// y: scalar of float
// o: scalar of double
// k: default elt width, double num elts
@ -856,6 +857,29 @@ def SCALAR_CMGTZ : SInst<"vcgtz", "ss", "Sl">;
def SCALAR_CMHI : SInst<"vcgt", "sss", "SUl">;
def SCALAR_CMTST : SInst<"vtst", "sss", "SlSUl">;
////////////////////////////////////////////////////////////////////////////////
// Scalar Floating-point Comparison
def SCALAR_FCMEQ : IInst<"vceq", "bss", "SfSd">;
def SCALAR_FCMEQZ : IInst<"vceqz", "bs", "SfSd">;
def SCALAR_FCMGE : IInst<"vcge", "bss", "SfSd">;
def SCALAR_FCMGEZ : IInst<"vcgez", "bs", "SfSd">;
def SCALAR_FCMGT : IInst<"vcgt", "bss", "SfSd">;
def SCALAR_FCMGTZ : IInst<"vcgtz", "bs", "SfSd">;
def SCALAR_FCMLE : IInst<"vcle", "bss", "SfSd">;
def SCALAR_FCMLEZ : IInst<"vclez", "bs", "SfSd">;
def SCALAR_FCMLT : IInst<"vclt", "bss", "SfSd">;
def SCALAR_FCMLTZ : IInst<"vcltz", "bs", "SfSd">;
////////////////////////////////////////////////////////////////////////////////
// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
def SCALAR_FACGE : IInst<"vcage", "bss", "SfSd">;
def SCALAR_FACLE : IInst<"vcale", "bss", "SfSd">;
////////////////////////////////////////////////////////////////////////////////
// Scalar Floating-point Absolute Compare Mask Greater Than
def SCALAR_FACGT : IInst<"vcagt", "bss", "SfSd">;
def SCALAR_FACLT : IInst<"vcalt", "bss", "SfSd">;
////////////////////////////////////////////////////////////////////////////////
// Scalar Absolute Value
def SCALAR_ABS : SInst<"vabs", "ss", "Sl">;

View File

@ -1755,6 +1755,7 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
// Extend element of one-element vector
bool ExtendEle = false;
bool OverloadInt = false;
bool OverloadCmpInt = false;
bool OverloadWideInt = false;
bool OverloadNarrowInt = false;
const char *s = NULL;
@ -2011,71 +2012,151 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
case AArch64::BI__builtin_neon_vceqd_s64:
case AArch64::BI__builtin_neon_vceqd_u64:
Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
OverloadInt = false; break;
OverloadCmpInt = true; break;
// Scalar Compare Equal To Zero
case AArch64::BI__builtin_neon_vceqzd_s64:
case AArch64::BI__builtin_neon_vceqzd_u64:
Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
OverloadInt = false; break;
OverloadCmpInt = true; break;
// Scalar Compare Greater Than or Equal
case AArch64::BI__builtin_neon_vcged_s64:
Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
OverloadInt = false; break;
OverloadCmpInt = true; break;
case AArch64::BI__builtin_neon_vcged_u64:
Int = Intrinsic::aarch64_neon_vchs; s = "vcge";
OverloadInt = false; break;
OverloadCmpInt = true; break;
// Scalar Compare Greater Than or Equal To Zero
case AArch64::BI__builtin_neon_vcgezd_s64:
Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
OverloadInt = false; break;
OverloadCmpInt = true; break;
// Scalar Compare Greater Than
case AArch64::BI__builtin_neon_vcgtd_s64:
Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
OverloadInt = false; break;
OverloadCmpInt = true; break;
case AArch64::BI__builtin_neon_vcgtd_u64:
Int = Intrinsic::aarch64_neon_vchi; s = "vcgt";
OverloadInt = false; break;
OverloadCmpInt = true; break;
// Scalar Compare Greater Than Zero
case AArch64::BI__builtin_neon_vcgtzd_s64:
Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
OverloadInt = false; break;
OverloadCmpInt = true; break;
// Scalar Compare Less Than or Equal
case AArch64::BI__builtin_neon_vcled_s64:
Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
OverloadInt = false; std::swap(Ops[0], Ops[1]); break;
OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
case AArch64::BI__builtin_neon_vcled_u64:
Int = Intrinsic::aarch64_neon_vchs; s = "vchs";
OverloadInt = false; std::swap(Ops[0], Ops[1]); break;
OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
// Scalar Compare Less Than or Equal To Zero
case AArch64::BI__builtin_neon_vclezd_s64:
Int = Intrinsic::aarch64_neon_vclez; s = "vcle";
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
OverloadInt = false; break;
OverloadCmpInt = true; break;
// Scalar Compare Less Than
case AArch64::BI__builtin_neon_vcltd_s64:
Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
OverloadInt = false; std::swap(Ops[0], Ops[1]); break;
OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
case AArch64::BI__builtin_neon_vcltd_u64:
Int = Intrinsic::aarch64_neon_vchi; s = "vchi";
OverloadInt = false; std::swap(Ops[0], Ops[1]); break;
OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
// Scalar Compare Less Than Zero
case AArch64::BI__builtin_neon_vcltzd_s64:
Int = Intrinsic::aarch64_neon_vcltz; s = "vclt";
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
OverloadInt = false; break;
OverloadCmpInt = true; break;
// Scalar Floating-point Compare Equal
case AArch64::BI__builtin_neon_vceqs_f32:
case AArch64::BI__builtin_neon_vceqd_f64:
Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
OverloadCmpInt = true; break;
// Scalar Floating-point Compare Equal To Zero
case AArch64::BI__builtin_neon_vceqzs_f32:
case AArch64::BI__builtin_neon_vceqzd_f64:
Int = Intrinsic::aarch64_neon_vceq; s = "vceq";
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
OverloadCmpInt = true; break;
// Scalar Floating-point Compare Greater Than Or Equal
case AArch64::BI__builtin_neon_vcges_f32:
case AArch64::BI__builtin_neon_vcged_f64:
Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
OverloadCmpInt = true; break;
// Scalar Floating-point Compare Greater Than Or Equal To Zero
case AArch64::BI__builtin_neon_vcgezs_f32:
case AArch64::BI__builtin_neon_vcgezd_f64:
Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
OverloadCmpInt = true; break;
// Scalar Floating-point Compare Greather Than
case AArch64::BI__builtin_neon_vcgts_f32:
case AArch64::BI__builtin_neon_vcgtd_f64:
Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
OverloadCmpInt = true; break;
// Scalar Floating-point Compare Greather Than Zero
case AArch64::BI__builtin_neon_vcgtzs_f32:
case AArch64::BI__builtin_neon_vcgtzd_f64:
Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
OverloadCmpInt = true; break;
// Scalar Floating-point Compare Less Than or Equal
case AArch64::BI__builtin_neon_vcles_f32:
case AArch64::BI__builtin_neon_vcled_f64:
Int = Intrinsic::aarch64_neon_vcge; s = "vcge";
OverloadCmpInt = true; break;
// Scalar Floating-point Compare Less Than Or Equal To Zero
case AArch64::BI__builtin_neon_vclezs_f32:
case AArch64::BI__builtin_neon_vclezd_f64:
Int = Intrinsic::aarch64_neon_vclez; s = "vcle";
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
OverloadCmpInt = true; break;
// Scalar Floating-point Compare Less Than Zero
case AArch64::BI__builtin_neon_vclts_f32:
case AArch64::BI__builtin_neon_vcltd_f64:
Int = Intrinsic::aarch64_neon_vcgt; s = "vcgt";
OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
// Scalar Floating-point Compare Less Than Zero
case AArch64::BI__builtin_neon_vcltzs_f32:
case AArch64::BI__builtin_neon_vcltzd_f64:
Int = Intrinsic::aarch64_neon_vcltz; s = "vclt";
// Add implicit zero operand.
Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType()));
OverloadCmpInt = true; break;
// Scalar Floating-point Absolute Compare Greater Than Or Equal
case AArch64::BI__builtin_neon_vcages_f32:
case AArch64::BI__builtin_neon_vcaged_f64:
Int = Intrinsic::aarch64_neon_vcage; s = "vcage";
OverloadCmpInt = true; break;
// Scalar Floating-point Absolute Compare Greater Than
case AArch64::BI__builtin_neon_vcagts_f32:
case AArch64::BI__builtin_neon_vcagtd_f64:
Int = Intrinsic::aarch64_neon_vcagt; s = "vcagt";
OverloadCmpInt = true; break;
// Scalar Floating-point Absolute Compare Less Than Or Equal
case AArch64::BI__builtin_neon_vcales_f32:
case AArch64::BI__builtin_neon_vcaled_f64:
Int = Intrinsic::aarch64_neon_vcage; s = "vcage";
OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
// Scalar Floating-point Absolute Compare Less Than
case AArch64::BI__builtin_neon_vcalts_f32:
case AArch64::BI__builtin_neon_vcaltd_f64:
Int = Intrinsic::aarch64_neon_vcagt; s = "vcalt";
OverloadCmpInt = true; std::swap(Ops[0], Ops[1]); break;
// Scalar Compare Bitwise Test Bits
case AArch64::BI__builtin_neon_vtstd_s64:
case AArch64::BI__builtin_neon_vtstd_u64:
Int = Intrinsic::aarch64_neon_vtstd; s = "vtst";
OverloadInt = false; break;
OverloadCmpInt = true; break;
// Scalar Absolute Value
case AArch64::BI__builtin_neon_vabsd_s64:
Int = Intrinsic::aarch64_neon_vabs;
@ -2187,6 +2268,19 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
llvm::VectorType::getExtendedElementVectorType(VTy) :
llvm::VectorType::getTruncatedElementVectorType(VTy);
F = CGF.CGM.getIntrinsic(Int, RTy);
} else if (OverloadCmpInt) {
// Determine the types of this overloaded AArch64 intrinsic
SmallVector<llvm::Type *, 3> Tys;
const Expr *Arg = E->getArg(E->getNumArgs()-1);
llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
llvm::VectorType *VTy = llvm::VectorType::get(Ty, 1);
Tys.push_back(VTy);
Ty = CGF.ConvertType(Arg->getType());
VTy = llvm::VectorType::get(Ty, 1);
Tys.push_back(VTy);
Tys.push_back(VTy);
F = CGF.CGM.getIntrinsic(Int, Tys);
} else
F = CGF.CGM.getIntrinsic(Int);

View File

@ -7315,3 +7315,172 @@ int32_t test_vqmovnd_u64(int64_t a) {
// CHECK: uqxtn {{s[0-9]+}}, {{d[0-9]+}}
return (int32_t)vqmovnd_u64(a);
}
uint32_t test_vceqs_f32(float32_t a, float32_t b) {
// CHECK: test_vceqs_f32
// CHECK: fcmeq {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
return (uint32_t)vceqs_f32(a, b);
}
uint64_t test_vceqd_f64(float64_t a, float64_t b) {
// CHECK: test_vceqd_f64
// CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
return (uint64_t)vceqd_f64(a, b);
}
uint32_t test_vceqzs_f32(float32_t a) {
// CHECK: test_vceqzs_f32
// CHECK: fcmeq {{s[0-9]+}}, {{s[0-9]+}}, #0.0
return (uint32_t)vceqzs_f32(a);
}
uint64_t test_vceqzd_f64(float64_t a) {
// CHECK: test_vceqzd_f64
// CHECK: fcmeq {{d[0-9]+}}, {{d[0-9]+}}, #0.0
return (uint64_t)vceqzd_f64(a);
}
uint32_t test_vcges_f32(float32_t a, float32_t b) {
// CHECK: test_vcges_f32
// CHECK: fcmge {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
return (uint32_t)vcges_f32(a, b);
}
uint64_t test_vcged_f64(float64_t a, float64_t b) {
// CHECK: test_vcged_f64
// CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
return (uint64_t)vcged_f64(a, b);
}
uint32_t test_vcgezs_f32(float32_t a) {
// CHECK: test_vcgezs_f32
// CHECK: fcmge {{s[0-9]+}}, {{s[0-9]+}}, #0.0
return (uint32_t)vcgezs_f32(a);
}
uint64_t test_vcgezd_f64(float64_t a) {
// CHECK: test_vcgezd_f64
// CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, #0.0
return (uint64_t)vcgezd_f64(a);
}
uint32_t test_vcgts_f32(float32_t a, float32_t b) {
// CHECK: test_vcgts_f32
// CHECK: fcmgt {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
return (uint32_t)vcgts_f32(a, b);
}
uint64_t test_vcgtd_f64(float64_t a, float64_t b) {
// CHECK: test_vcgtd_f64
// CHECK: fcmgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
return (uint64_t)vcgtd_f64(a, b);
}
uint32_t test_vcgtzs_f32(float32_t a) {
// CHECK: test_vcgtzs_f32
// CHECK: fcmgt {{s[0-9]+}}, {{s[0-9]+}}, #0.0
return (uint32_t)vcgtzs_f32(a);
}
uint64_t test_vcgtzd_f64(float64_t a) {
// CHECK: test_vcgtzd_f64
// CHECK: fcmgt {{d[0-9]+}}, {{d[0-9]+}}, #0.0
return (uint64_t)vcgtzd_f64(a);
}
uint32_t test_vcles_f32(float32_t a, float32_t b) {
// CHECK: test_vcles_f32
// CHECK: fcmge {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
return (uint32_t)vcles_f32(a, b);
}
uint64_t test_vcled_f64(float64_t a, float64_t b) {
// CHECK: test_vcled_f64
// CHECK: fcmge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
return (uint64_t)vcled_f64(a, b);
}
uint32_t test_vclezs_f32(float32_t a) {
// CHECK: test_vclezs_f32
// CHECK: fcmle {{s[0-9]+}}, {{s[0-9]+}}, #0.0
return (uint32_t)vclezs_f32(a);
}
uint64_t test_vclezd_f64(float64_t a) {
// CHECK: test_vclezd_f64
// CHECK: fcmle {{d[0-9]+}}, {{d[0-9]+}}, #0.0
return (uint64_t)vclezd_f64(a);
}
uint32_t test_vclts_f32(float32_t a, float32_t b) {
// CHECK: test_vclts_f32
// CHECK: fcmgt {{s[0-9]+}}, s1, s0
return (uint32_t)vclts_f32(a, b);
}
uint64_t test_vcltd_f64(float64_t a, float64_t b) {
// CHECK: test_vcltd_f64
// CHECK: fcmgt {{d[0-9]+}}, d1, d0
return (uint64_t)vcltd_f64(a, b);
}
uint32_t test_vcltzs_f32(float32_t a) {
// CHECK: test_vcltzs_f32
// CHECK: fcmlt {{s[0-9]+}}, {{s[0-9]+}}, #0.0
return (uint32_t)vcltzs_f32(a);
}
uint64_t test_vcltzd_f64(float64_t a) {
// CHECK: test_vcltzd_f64
// CHECK: fcmlt {{d[0-9]+}}, {{d[0-9]+}}, #0.0
return (uint64_t)vcltzd_f64(a);
}
uint32_t test_vcages_f32(float32_t a, float32_t b) {
// CHECK: test_vcages_f32
// CHECK: facge {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
return (uint32_t)vcages_f32(a, b);
}
uint64_t test_vcaged_f64(float64_t a, float64_t b) {
// CHECK: test_vcaged_f64
// CHECK: facge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
return (uint64_t)vcaged_f64(a, b);
}
uint32_t test_vcagts_f32(float32_t a, float32_t b) {
// CHECK: test_vcagts_f32
// CHECK: facgt {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
return (uint32_t)vcagts_f32(a, b);
}
uint64_t test_vcagtd_f64(float64_t a, float64_t b) {
// CHECK: test_vcagtd_f64
// CHECK: facgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
return (uint64_t)vcagtd_f64(a, b);
}
uint32_t test_vcales_f32(float32_t a, float32_t b) {
// CHECK: test_vcales_f32
// CHECK: facge {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
return (uint32_t)vcales_f32(a, b);
}
uint64_t test_vcaled_f64(float64_t a, float64_t b) {
// CHECK: test_vcaled_f64
// CHECK: facge {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
return (uint64_t)vcaled_f64(a, b);
}
uint32_t test_vcalts_f32(float32_t a, float32_t b) {
// CHECK: test_vcalts_f32
// CHECK: facgt {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
return (uint32_t)vcalts_f32(a, b);
}
uint64_t test_vcaltd_f64(float64_t a, float64_t b) {
// CHECK: test_vcaltd_f64
// CHECK: facgt {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
return (uint64_t)vcaltd_f64(a, b);
}

View File

@ -442,6 +442,8 @@ static char ModType(const char mod, char type, bool &quad, bool &poly,
usgn = true;
}
break;
case 'b':
scal = true;
case 'u':
usgn = true;
poly = false;