PowerPC: More support for Altivec compare operations

This patch adds more support for vector type comparisons using altivec.
It adds correct support for v16i8, v8i16, v4i32, and v4f32 vector
types for comparison operators ==, !=, >, >=, <, and <=.

llvm-svn: 167015
This commit is contained in:
Adhemerval Zanella 2012-10-30 13:50:19 +00:00
parent 3ce427c039
commit 56775e0f13
3 changed files with 491 additions and 27 deletions

View File

@ -623,6 +623,88 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
}
}
// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
switch (CC) {
case ISD::SETEQ:
case ISD::SETUEQ:
case ISD::SETNE:
case ISD::SETUNE:
if (VecVT == MVT::v16i8)
return PPC::VCMPEQUB;
else if (VecVT == MVT::v8i16)
return PPC::VCMPEQUH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPEQUW;
// v4f32 != v4f32 could be translate to unordered not equal
else if (VecVT == MVT::v4f32)
return PPC::VCMPEQFP;
break;
case ISD::SETLT:
case ISD::SETGT:
case ISD::SETLE:
case ISD::SETGE:
if (VecVT == MVT::v16i8)
return PPC::VCMPGTSB;
else if (VecVT == MVT::v8i16)
return PPC::VCMPGTSH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTSW;
else if (VecVT == MVT::v4f32)
return PPC::VCMPGTFP;
break;
case ISD::SETULT:
case ISD::SETUGT:
case ISD::SETUGE:
case ISD::SETULE:
if (VecVT == MVT::v16i8)
return PPC::VCMPGTUB;
else if (VecVT == MVT::v8i16)
return PPC::VCMPGTUH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTUW;
break;
case ISD::SETOEQ:
if (VecVT == MVT::v4f32)
return PPC::VCMPEQFP;
break;
case ISD::SETOLT:
case ISD::SETOGT:
case ISD::SETOLE:
if (VecVT == MVT::v4f32)
return PPC::VCMPGTFP;
break;
case ISD::SETOGE:
if (VecVT == MVT::v4f32)
return PPC::VCMPGEFP;
break;
default:
break;
}
llvm_unreachable("Invalid integer vector compare condition");
}
// getVCmpEQInst: return the equal compare instruction for the specified vector
// type. Since this is for altivec specific code, only support the altivec
// types (v16i8, v8i16, v4i32, and v4f32).
static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) {
switch (VecVT) {
case MVT::v16i8:
return PPC::VCMPEQUB;
case MVT::v8i16:
return PPC::VCMPEQUH;
case MVT::v4i32:
return PPC::VCMPEQUW;
case MVT::v4f32:
return PPC::VCMPEQFP;
default:
llvm_unreachable("Invalid integer vector compare condition");
}
}
SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
unsigned Imm;
@ -706,20 +788,58 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
// Altivec Vector compare instructions do not set any CR register by default
// Altivec Vector compare instructions do not set any CR register by default and
// vector compare operations return the same type as the operands.
if (LHS.getValueType().isVector()) {
unsigned int Opc;
if (LHS.getValueType() == MVT::v16i8)
Opc = PPC::VCMPEQUB;
else if (LHS.getValueType() == MVT::v4i32)
Opc = PPC::VCMPEQUW;
else if (LHS.getValueType() == MVT::v8i16)
Opc = PPC::VCMPEQUH;
else if (LHS.getValueType() == MVT::v4f32)
Opc = PPC::VCMPEQFP;
else
llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
return CurDAG->SelectNodeTo(N, Opc, LHS.getValueType(), LHS, RHS);
EVT VecVT = LHS.getValueType();
MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
unsigned int VCmpInst = getVCmpInst(VT, CC);
switch (CC) {
case ISD::SETEQ:
case ISD::SETOEQ:
case ISD::SETUEQ:
return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
case ISD::SETNE:
case ISD::SETONE:
case ISD::SETUNE: {
SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
return CurDAG->SelectNodeTo(N, PPC::VNOR, VecVT, VCmp, VCmp);
}
case ISD::SETLT:
case ISD::SETOLT:
case ISD::SETULT:
return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS);
case ISD::SETGT:
case ISD::SETOGT:
case ISD::SETUGT:
return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
case ISD::SETGE:
case ISD::SETOGE:
case ISD::SETUGE: {
// Small optimization: Altivec provides a 'Vector Compare Greater Than
// or Equal To' instruction (vcmpgefp), so in this case there is no
// need for extra logic for the equal compare.
if (VecVT.getSimpleVT().isFloatingPoint()) {
return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
} else {
SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
unsigned int VCmpEQInst = getVCmpEQInst(VT);
SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ);
}
}
case ISD::SETLE:
case ISD::SETOLE:
case ISD::SETULE: {
SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0);
unsigned int VCmpEQInst = getVCmpEQInst(VT);
SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ);
}
default:
llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
}
}
bool Inv;

View File

@ -396,6 +396,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
// Altivec does not contain unordered floating-point compare instructions
setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
}
if (Subtarget->has64BitSupport()) {

View File

@ -1,6 +1,9 @@
; RUN: llc -mattr=+altivec < %s | FileCheck %s
; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
; Check vector comparisons using altivec.
; Check vector comparisons using altivec. For non native types, just basic
; comparison instruction check is done. For altivec supported type (16i8,
; 8i16, 4i32, and 4f32) all the comparisons operators (==, !=, >, >=, <, <=)
; are checked.
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
@ -33,13 +36,105 @@ define <8 x i8> @v8si8_cmp(<8 x i8> %x, <8 x i8> %y) nounwind readnone {
; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
define <16 x i8> @v16si8_cmp(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
; Adicional tests for v16i8 since it is a altivec native type
define <16 x i8> @v16si8_cmp_eq(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
%cmp = icmp eq <16 x i8> %x, %y
%sext = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %sext
}
; CHECK: v16si8_cmp:
; CHECK: vcmpequb {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: v16si8_cmp_eq:
; CHECK: vcmpequb 2, 2, 3
define <16 x i8> @v16si8_cmp_ne(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
entry:
%cmp = icmp ne <16 x i8> %x, %y
%sext = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %sext
}
; CHECK: v16si8_cmp_ne:
; CHECK: vcmpequb [[RET:[0-9]+]], 2, 3
; CHECK-NOR: vnor 2, [[RET]], [[RET]]
define <16 x i8> @v16si8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
entry:
%cmp = icmp sle <16 x i8> %x, %y
%sext = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %sext
}
; CHECK: v16si8_cmp_le:
; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
; CHECK-NEXT: vcmpgtsb [[RCMPLE:[0-9]+]], 3, 2
; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]]
define <16 x i8> @v16ui8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
entry:
%cmp = icmp ule <16 x i8> %x, %y
%sext = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %sext
}
; CHECK: v16ui8_cmp_le:
; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
; CHECK-NEXT: vcmpgtub [[RCMPLE:[0-9]+]], 3, 2
; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]]
define <16 x i8> @v16si8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
entry:
%cmp = icmp slt <16 x i8> %x, %y
%sext = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %sext
}
; CHECK: v16si8_cmp_lt:
; CHECK: vcmpgtsb 2, 3, 2
define <16 x i8> @v16ui8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
entry:
%cmp = icmp ult <16 x i8> %x, %y
%sext = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %sext
}
; CHECK: v16ui8_cmp_lt:
; CHECK: vcmpgtub 2, 3, 2
define <16 x i8> @v16si8_cmp_gt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
entry:
%cmp = icmp sgt <16 x i8> %x, %y
%sext = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %sext
}
; CHECK: v16si8_cmp_gt:
; CHECK: vcmpgtsb 2, 2, 3
define <16 x i8> @v16ui8_cmp_gt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
entry:
%cmp = icmp ugt <16 x i8> %x, %y
%sext = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %sext
}
; CHECK: v16ui8_cmp_gt:
; CHECK: vcmpgtub 2, 2, 3
define <16 x i8> @v16si8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
entry:
%cmp = icmp sge <16 x i8> %x, %y
%sext = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %sext
}
; CHECK: v16si8_cmp_ge:
; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
; CHECK-NEXT: vcmpgtsb [[RCMPGT:[0-9]+]], 2, 3
; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]]
define <16 x i8> @v16ui8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
entry:
%cmp = icmp uge <16 x i8> %x, %y
%sext = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %sext
}
; CHECK: v16ui8_cmp_ge:
; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
; CHECK-NEXT: vcmpgtub [[RCMPGT:[0-9]+]], 2, 3
; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]]
define <32 x i8> @v32si8_cmp(<32 x i8> %x, <32 x i8> %y) nounwind readnone {
@ -70,13 +165,106 @@ define <4 x i16> @v4si16_cmp(<4 x i16> %x, <4 x i16> %y) nounwind readnone {
; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
define <8 x i16> @v8si16_cmp(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
; Adicional tests for v8i16 since it is an altivec native type
define <8 x i16> @v8si16_cmp_eq(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
entry:
%cmp = icmp eq <8 x i16> %x, %y
%sext = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %sext
}
; CHECK: v8si16_cmp:
; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: v8si16_cmp_eq:
; CHECK: vcmpequh 2, 2, 3
define <8 x i16> @v8si16_cmp_ne(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
entry:
%cmp = icmp ne <8 x i16> %x, %y
%sext = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %sext
}
; CHECK: v8si16_cmp_ne:
; CHECK: vcmpequh [[RET:[0-9]+]], 2, 3
; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
define <8 x i16> @v8si16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
entry:
%cmp = icmp sle <8 x i16> %x, %y
%sext = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %sext
}
; CHECK: v8si16_cmp_le:
; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
; CHECK-NEXT: vcmpgtsh [[RCMPLE:[0-9]+]], 3, 2
; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]]
define <8 x i16> @v8ui16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
entry:
%cmp = icmp ule <8 x i16> %x, %y
%sext = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %sext
}
; CHECK: v8ui16_cmp_le:
; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
; CHECK-NEXT: vcmpgtuh [[RCMPLE:[0-9]+]], 3, 2
; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]]
define <8 x i16> @v8si16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
entry:
%cmp = icmp slt <8 x i16> %x, %y
%sext = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %sext
}
; CHECK: v8si16_cmp_lt:
; CHECK: vcmpgtsh 2, 3, 2
define <8 x i16> @v8ui16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
entry:
%cmp = icmp ult <8 x i16> %x, %y
%sext = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %sext
}
; CHECK: v8ui16_cmp_lt:
; CHECK: vcmpgtuh 2, 3, 2
define <8 x i16> @v8si16_cmp_gt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
entry:
%cmp = icmp sgt <8 x i16> %x, %y
%sext = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %sext
}
; CHECK: v8si16_cmp_gt:
; CHECK: vcmpgtsh 2, 2, 3
define <8 x i16> @v8ui16_cmp_gt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
entry:
%cmp = icmp ugt <8 x i16> %x, %y
%sext = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %sext
}
; CHECK: v8ui16_cmp_gt:
; CHECK: vcmpgtuh 2, 2, 3
define <8 x i16> @v8si16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
entry:
%cmp = icmp sge <8 x i16> %x, %y
%sext = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %sext
}
; CHECK: v8si16_cmp_ge:
; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
; CHECK-NEXT: vcmpgtsh [[RCMPGT:[0-9]+]], 2, 3
; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]]
define <8 x i16> @v8ui16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
entry:
%cmp = icmp uge <8 x i16> %x, %y
%sext = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %sext
}
; CHECK: v8ui16_cmp_ge:
; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
; CHECK-NEXT: vcmpgtuh [[RCMPGT:[0-9]+]], 2, 3
; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]]
define <16 x i16> @v16si16_cmp(<16 x i16> %x, <16 x i16> %y) nounwind readnone {
@ -110,13 +298,106 @@ define <2 x i32> @v2si32_cmp(<2 x i32> %x, <2 x i32> %y) nounwind readnone {
; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
define <4 x i32> @v4si32_cmp(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
; Adicional tests for v4si32 since it is an altivec native type
define <4 x i32> @v4si32_cmp_eq(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
entry:
%cmp = icmp eq <4 x i32> %x, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %sext
}
; CHECK: v4si32_cmp:
; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: v4si32_cmp_eq:
; CHECK: vcmpequw 2, 2, 3
define <4 x i32> @v4si32_cmp_ne(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
entry:
%cmp = icmp ne <4 x i32> %x, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %sext
}
; CHECK: v4si32_cmp_ne:
; CHECK: vcmpequw [[RCMP:[0-9]+]], 2, 3
; CHECK-NEXT: vnor 2, [[RCMP]], [[RCMP]]
define <4 x i32> @v4si32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
entry:
%cmp = icmp sle <4 x i32> %x, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %sext
}
; CHECK: v4si32_cmp_le:
; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
; CHECK-NEXT: vcmpgtsw [[RCMPLE:[0-9]+]], 3, 2
; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]]
define <4 x i32> @v4ui32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
entry:
%cmp = icmp ule <4 x i32> %x, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %sext
}
; CHECK: v4ui32_cmp_le:
; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
; CHECK-NEXT: vcmpgtuw [[RCMPLE:[0-9]+]], 3, 2
; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]]
define <4 x i32> @v4si32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
entry:
%cmp = icmp slt <4 x i32> %x, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %sext
}
; CHECK: v4si32_cmp_lt:
; CHECK: vcmpgtsw 2, 3, 2
define <4 x i32> @v4ui32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
entry:
%cmp = icmp ult <4 x i32> %x, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %sext
}
; CHECK: v4ui32_cmp_lt:
; CHECK: vcmpgtuw 2, 3, 2
define <4 x i32> @v4si32_cmp_gt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
entry:
%cmp = icmp sgt <4 x i32> %x, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %sext
}
; CHECK: v4si32_cmp_gt:
; CHECK: vcmpgtsw 2, 2, 3
define <4 x i32> @v4ui32_cmp_gt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
entry:
%cmp = icmp ugt <4 x i32> %x, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %sext
}
; CHECK: v4ui32_cmp_gt:
; CHECK: vcmpgtuw 2, 2, 3
define <4 x i32> @v4si32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
entry:
%cmp = icmp sge <4 x i32> %x, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %sext
}
; CHECK: v4si32_cmp_ge:
; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
; CHECK-NEXT: vcmpgtsw [[RCMPGT:[0-9]+]], 2, 3
; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]]
define <4 x i32> @v4ui32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
entry:
%cmp = icmp uge <4 x i32> %x, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %sext
}
; CHECK: v4ui32_cmp_ge:
; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
; CHECK-NEXT: vcmpgtuw [[RCMPGT:[0-9]+]], 2, 3
; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]]
define <8 x i32> @v8si32_cmp(<8 x i32> %x, <8 x i32> %y) nounwind readnone {
@ -168,15 +449,70 @@ entry:
; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
define <4 x float> @v4f32_cmp(<4 x float> %x, <4 x float> %y) nounwind readnone {
; Adicional tests for v4f32 since it is a altivec native type
define <4 x float> @v4f32_cmp_eq(<4 x float> %x, <4 x float> %y) nounwind readnone {
entry:
%cmp = fcmp oeq <4 x float> %x, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
%0 = bitcast <4 x i32> %sext to <4 x float>
ret <4 x float> %0
}
; CHECK: v4f32_cmp:
; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
; CHECK: v4f32_cmp_eq:
; CHECK: vcmpeqfp 2, 2, 3
define <4 x float> @v4f32_cmp_ne(<4 x float> %x, <4 x float> %y) nounwind readnone {
entry:
%cmp = fcmp une <4 x float> %x, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
%0 = bitcast <4 x i32> %sext to <4 x float>
ret <4 x float> %0
}
; CHECK: v4f32_cmp_ne:
; CHECK: vcmpeqfp [[RET:[0-9]+]], 2, 3
; CHECK-NEXT: vnor 2, [[RET]], [[RET]]
define <4 x float> @v4f32_cmp_le(<4 x float> %x, <4 x float> %y) nounwind readnone {
entry:
%cmp = fcmp ole <4 x float> %x, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
%0 = bitcast <4 x i32> %sext to <4 x float>
ret <4 x float> %0
}
; CHECK: v4f32_cmp_le:
; CHECK: vcmpeqfp [[RCMPEQ:[0-9]+]], 2, 3
; CHECK-NEXT: vcmpgtfp [[RCMPLE:[0-9]+]], 3, 2
; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]]
define <4 x float> @v4f32_cmp_lt(<4 x float> %x, <4 x float> %y) nounwind readnone {
entry:
%cmp = fcmp olt <4 x float> %x, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
%0 = bitcast <4 x i32> %sext to <4 x float>
ret <4 x float> %0
}
; CHECK: v4f32_cmp_lt:
; CHECK: vcmpgtfp 2, 3, 2
define <4 x float> @v4f32_cmp_ge(<4 x float> %x, <4 x float> %y) nounwind readnone {
entry:
%cmp = fcmp oge <4 x float> %x, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
%0 = bitcast <4 x i32> %sext to <4 x float>
ret <4 x float> %0
}
; CHECK: v4f32_cmp_ge:
; CHECK: vcmpgefp 2, 2, 3
define <4 x float> @v4f32_cmp_gt(<4 x float> %x, <4 x float> %y) nounwind readnone {
entry:
%cmp = fcmp ogt <4 x float> %x, %y
%sext = sext <4 x i1> %cmp to <4 x i32>
%0 = bitcast <4 x i32> %sext to <4 x float>
ret <4 x float> %0
}
; CHECK: v4f32_cmp_gt:
; CHECK: vcmpgtfp 2, 2, 3
define <8 x float> @v8f32_cmp(<8 x float> %x, <8 x float> %y) nounwind readnone {