forked from OSchip/llvm-project
[X86] Allow combineVectorCompareAndMaskUnaryOp to handle 'all-bits' general case
For the sint_to_fp(and(X,C)) -> and(X,sint_to_fp(C)) fold, allow combineVectorCompareAndMaskUnaryOp to match any X that ComputeNumSignBits says is all-bits, not just SETCC. Noticed while investigating mask promotion issues in PR45808
This commit is contained in:
parent
7425bdbd2f
commit
0e8e731449
llvm
|
@ -45564,10 +45564,11 @@ static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
// TODO: Could we move this to DAGCombine?
|
||||
static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
|
||||
SelectionDAG &DAG) {
|
||||
// Take advantage of vector comparisons producing 0 or -1 in each lane to
|
||||
// optimize away operation when it's from a constant.
|
||||
// Take advantage of vector comparisons (etc.) producing 0 or -1 in each lane
|
||||
// to optimize away operation when it's from a constant.
|
||||
//
|
||||
// The general transformation is:
|
||||
// UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
|
||||
|
@ -45579,10 +45580,10 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
|
|||
// aren't the same.
|
||||
EVT VT = N->getValueType(0);
|
||||
bool IsStrict = N->isStrictFPOpcode();
|
||||
unsigned NumEltBits = VT.getScalarSizeInBits();
|
||||
SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);
|
||||
if (!VT.isVector() || Op0->getOpcode() != ISD::AND ||
|
||||
Op0->getOperand(0)->getOpcode() != ISD::SETCC ||
|
||||
VT.getSizeInBits() != Op0.getValueSizeInBits())
|
||||
if (!VT.isVector() || Op0.getOpcode() != ISD::AND ||
|
||||
DAG.ComputeNumSignBits(Op0.getOperand(0)) != NumEltBits)
|
||||
return SDValue();
|
||||
|
||||
// Now check that the other operand of the AND is a constant. We could
|
||||
|
|
|
@ -97,17 +97,16 @@ define void @foo4(<4 x float>* noalias %result) nounwind {
|
|||
ret void
|
||||
}
|
||||
|
||||
; TODO: Test when we're masking against a sign extended setcc.
|
||||
; Test when we're masking against a sign extended setcc.
|
||||
define <4 x float> @foo5(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; CHECK-LABEL: LCPI5_0:
|
||||
; CHECK-NEXT: .long 1 ## 0x1
|
||||
; CHECK-NEXT: .long 1065353216 ## 0x3f800000
|
||||
; CHECK-NEXT: .long 0 ## 0x0
|
||||
; CHECK-NEXT: .long 1 ## 0x1
|
||||
; CHECK-NEXT: .long 1065353216 ## 0x3f800000
|
||||
; CHECK-NEXT: .long 0 ## 0x0
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
|
||||
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%1 = icmp sgt <4 x i32> %a0, %a1
|
||||
%2 = sext <4 x i1> %1 to <4 x i32>
|
||||
|
@ -116,12 +115,12 @@ define <4 x float> @foo5(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
ret <4 x float> %4
|
||||
}
|
||||
|
||||
; TODO: Test when we're masking against mask arithmetic, not the setcc's directly.
|
||||
; Test when we're masking against mask arithmetic, not the setcc's directly.
|
||||
define <4 x float> @foo6(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; CHECK-LABEL: LCPI6_0:
|
||||
; CHECK-NEXT: .long 1 ## 0x1
|
||||
; CHECK-NEXT: .long 1065353216 ## 0x3f800000
|
||||
; CHECK-NEXT: .long 0 ## 0x0
|
||||
; CHECK-NEXT: .long 1 ## 0x1
|
||||
; CHECK-NEXT: .long 1065353216 ## 0x3f800000
|
||||
; CHECK-NEXT: .long 0 ## 0x0
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: movdqa %xmm0, %xmm2
|
||||
|
@ -130,7 +129,6 @@ define <4 x float> @foo6(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
|
||||
; CHECK-NEXT: pand %xmm2, %xmm0
|
||||
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%1 = icmp sgt <4 x i32> %a0, %a1
|
||||
%2 = icmp sgt <4 x i32> %a0, zeroinitializer
|
||||
|
@ -139,4 +137,4 @@ define <4 x float> @foo6(<4 x i32> %a0, <4 x i32> %a1) {
|
|||
%5 = and <4 x i32> %4, <i32 1, i32 0, i32 1, i32 0>
|
||||
%6 = uitofp <4 x i32> %5 to <4 x float>
|
||||
ret <4 x float> %6
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue