forked from OSchip/llvm-project
Optimize vector select from all 0s or all 1s
As packed comparisons in AVX/SSE produce all 0s or all 1s in each SIMD lane, vector select could be simplified to AND/OR or removed if one or both values being selected is all 0s or all 1s. llvm-svn: 179267
This commit is contained in:
parent
95d9440348
commit
55658d4222
|
@ -15787,6 +15787,51 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
|
|||
if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget))
|
||||
return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS);
|
||||
|
||||
// Simplify vector selection if the selector will be produced by CMPP*/PCMP*.
|
||||
if (!DCI.isBeforeLegalize() && N->getOpcode() == ISD::VSELECT &&
|
||||
Cond.getOpcode() == ISD::SETCC) {
|
||||
|
||||
assert(Cond.getValueType().isVector() &&
|
||||
"vector select expects a vector selector!");
|
||||
|
||||
EVT IntVT = Cond.getValueType();
|
||||
bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
|
||||
bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
|
||||
|
||||
if (!TValIsAllOnes && !FValIsAllZeros) {
|
||||
// Try invert the condition if true value is not all 1s and false value
|
||||
// is not all 0s.
|
||||
bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
|
||||
bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());
|
||||
|
||||
if (TValIsAllZeros || FValIsAllOnes) {
|
||||
SDValue CC = Cond.getOperand(2);
|
||||
ISD::CondCode NewCC =
|
||||
ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
|
||||
Cond.getOperand(0).getValueType().isInteger());
|
||||
Cond = DAG.getSetCC(DL, IntVT, Cond.getOperand(0), Cond.getOperand(1), NewCC);
|
||||
std::swap(LHS, RHS);
|
||||
TValIsAllOnes = FValIsAllOnes;
|
||||
FValIsAllZeros = TValIsAllZeros;
|
||||
}
|
||||
}
|
||||
|
||||
if (TValIsAllOnes || FValIsAllZeros) {
|
||||
SDValue Ret;
|
||||
|
||||
if (TValIsAllOnes && FValIsAllZeros)
|
||||
Ret = Cond;
|
||||
else if (TValIsAllOnes)
|
||||
Ret = DAG.getNode(ISD::OR, DL, IntVT, Cond,
|
||||
DAG.getNode(ISD::BITCAST, DL, IntVT, RHS));
|
||||
else if (FValIsAllZeros)
|
||||
Ret = DAG.getNode(ISD::AND, DL, IntVT, Cond,
|
||||
DAG.getNode(ISD::BITCAST, DL, IntVT, LHS));
|
||||
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, Ret);
|
||||
}
|
||||
}
|
||||
|
||||
// If we know that this node is legal then we know that it is going to be
|
||||
// matched by one of the SSE/AVX BLEND instructions. These instructions only
|
||||
// depend on the highest bit in each word. Try to use SimplifyDemandedBits
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
; RUN: opt < %s -O3 | \
|
||||
; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
||||
|
||||
define <4 x i32> @test1(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
|
||||
%f = fcmp ult <4 x float> %a, %b
|
||||
%r = select <4 x i1> %f, <4 x i32> %c, <4 x i32> zeroinitializer
|
||||
ret <4 x i32> %r
|
||||
; CHECK: test1
|
||||
; CHECK: cmpnle
|
||||
; CHECK-NEXT: andps
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
define <4 x i32> @test2(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
|
||||
%f = fcmp ult <4 x float> %a, %b
|
||||
%r = select <4 x i1> %f, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c
|
||||
ret <4 x i32> %r
|
||||
; CHECK: test2
|
||||
; CHECK: cmpnle
|
||||
; CHECK-NEXT: orps
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
define <4 x i32> @test3(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
|
||||
%f = fcmp ult <4 x float> %a, %b
|
||||
%r = select <4 x i1> %f, <4 x i32> zeroinitializer, <4 x i32> %c
|
||||
ret <4 x i32> %r
|
||||
; CHECK: test3
|
||||
; CHECK: cmple
|
||||
; CHECK-NEXT: andps
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
define <4 x i32> @test4(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
|
||||
%f = fcmp ult <4 x float> %a, %b
|
||||
%r = select <4 x i1> %f, <4 x i32> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
ret <4 x i32> %r
|
||||
; CHECK: test4
|
||||
; CHECK: cmple
|
||||
; CHECK-NEXT: orps
|
||||
; CHECK: ret
|
||||
}
|
||||
|
||||
define <4 x i32> @test5(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
|
||||
%f = fcmp ult <4 x float> %a, %b
|
||||
%r = select <4 x i1> %f, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer
|
||||
ret <4 x i32> %r
|
||||
; CHECK: test5
|
||||
; CHECK: cmpnle
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
||||
define <4 x i32> @test6(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
|
||||
%f = fcmp ult <4 x float> %a, %b
|
||||
%r = select <4 x i1> %f, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
ret <4 x i32> %r
|
||||
; CHECK: test6
|
||||
; CHECK: cmple
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
||||
define <4 x i32> @test7(<4 x float> %a, <4 x float> %b, <4 x i32>* %p) {
|
||||
%f = fcmp ult <4 x float> %a, %b
|
||||
%s = sext <4 x i1> %f to <4 x i32>
|
||||
%l = load <4 x i32>* %p
|
||||
%r = and <4 x i32> %l, %s
|
||||
ret <4 x i32> %r
|
||||
; CHECK: test7
|
||||
; CHECK: cmpnle
|
||||
; CHECK-NEXT: andps
|
||||
; CHECK: ret
|
||||
}
|
Loading…
Reference in New Issue