forked from OSchip/llvm-project
[X86][SSE] combineSetCCMOVMSK - fold MOVMSK(SHUFFLE(X,u)) -> MOVMSK(X)
If we're permuting ALL the elements of a single vector, then for allof/anyof MOVMSK tests we can avoid the shuffle entirely.
This commit is contained in:
parent
584d0d5c17
commit
cad2038700
|
@ -5441,6 +5441,14 @@ static bool isAnyZero(ArrayRef<int> Mask) {
|
|||
return llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
|
||||
}
|
||||
|
||||
/// Return true if the value of any element in Mask is the zero or undef
|
||||
/// sentinel values.
|
||||
static bool isAnyZeroOrUndef(ArrayRef<int> Mask) {
|
||||
return llvm::any_of(Mask, [](int M) {
|
||||
return M == SM_SentinelZero || M == SM_SentinelUndef;
|
||||
});
|
||||
}
|
||||
|
||||
/// Return true if Val is undef or if its value falls within the
|
||||
/// specified range (L, H].
|
||||
static bool isUndefOrInRange(int Val, int Low, int Hi) {
|
||||
|
@ -40590,6 +40598,31 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
|
|||
}
|
||||
}
|
||||
|
||||
// MOVMSK(SHUFFLE(X,u)) -> MOVMSK(X) iff every element is referenced.
|
||||
SmallVector<int, 32> ShuffleMask;
|
||||
SmallVector<SDValue, 2> ShuffleInputs;
|
||||
if (NumElts == CmpBits &&
|
||||
getTargetShuffleInputs(peekThroughBitcasts(Vec), ShuffleInputs,
|
||||
ShuffleMask, DAG) &&
|
||||
ShuffleInputs.size() == 1 && !isAnyZeroOrUndef(ShuffleMask) &&
|
||||
ShuffleInputs[0].getValueSizeInBits() == VecVT.getSizeInBits()) {
|
||||
unsigned NumShuffleElts = ShuffleMask.size();
|
||||
APInt DemandedElts = APInt::getNullValue(NumShuffleElts);
|
||||
for (int M : ShuffleMask) {
|
||||
assert(0 <= M && M < (int)NumShuffleElts && "Bad unary shuffle index");
|
||||
DemandedElts.setBit(M);
|
||||
}
|
||||
if (DemandedElts.isAllOnesValue()) {
|
||||
SDLoc DL(EFLAGS);
|
||||
SDValue Result = DAG.getBitcast(VecVT, ShuffleInputs[0]);
|
||||
Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
|
||||
Result =
|
||||
DAG.getZExtOrTrunc(Result, DL, EFLAGS.getOperand(0).getValueType());
|
||||
return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result,
|
||||
EFLAGS.getOperand(1));
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
|
|
@ -530,7 +530,6 @@ define i1 @allones_v32i16_sign(<32 x i16> %arg) {
|
|||
; AVX2-LABEL: allones_v32i16_sign:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: cmpl $-1, %eax
|
||||
; AVX2-NEXT: sete %al
|
||||
|
@ -598,7 +597,6 @@ define i1 @allzeros_v32i16_sign(<32 x i16> %arg) {
|
|||
; AVX2-LABEL: allzeros_v32i16_sign:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: testl %eax, %eax
|
||||
; AVX2-NEXT: sete %al
|
||||
|
@ -1008,7 +1006,6 @@ define i1 @allones_v8i64_sign(<8 x i64> %arg) {
|
|||
; AVX2-LABEL: allones_v8i64_sign:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||
; AVX2-NEXT: cmpb $-1, %al
|
||||
; AVX2-NEXT: sete %al
|
||||
|
@ -1068,7 +1065,6 @@ define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {
|
|||
; AVX2-LABEL: allzeros_v8i64_sign:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||
; AVX2-NEXT: testb %al, %al
|
||||
; AVX2-NEXT: sete %al
|
||||
|
@ -1642,7 +1638,6 @@ define i1 @allones_v32i16_and1(<32 x i16> %arg) {
|
|||
; AVX2-NEXT: vpsllw $15, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: cmpl $-1, %eax
|
||||
; AVX2-NEXT: sete %al
|
||||
|
@ -1722,7 +1717,6 @@ define i1 @allzeros_v32i16_and1(<32 x i16> %arg) {
|
|||
; AVX2-NEXT: vpsllw $15, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: testl %eax, %eax
|
||||
; AVX2-NEXT: sete %al
|
||||
|
@ -2391,7 +2385,6 @@ define i1 @allones_v8i64_and1(<8 x i64> %arg) {
|
|||
; AVX2-NEXT: vpsllq $63, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||
; AVX2-NEXT: cmpb $-1, %al
|
||||
; AVX2-NEXT: sete %al
|
||||
|
@ -2461,7 +2454,6 @@ define i1 @allzeros_v8i64_and1(<8 x i64> %arg) {
|
|||
; AVX2-NEXT: vpsllq $63, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||
; AVX2-NEXT: testb %al, %al
|
||||
; AVX2-NEXT: sete %al
|
||||
|
@ -3035,7 +3027,6 @@ define i1 @allones_v32i16_and4(<32 x i16> %arg) {
|
|||
; AVX2-NEXT: vpsllw $13, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: cmpl $-1, %eax
|
||||
; AVX2-NEXT: sete %al
|
||||
|
@ -3115,7 +3106,6 @@ define i1 @allzeros_v32i16_and4(<32 x i16> %arg) {
|
|||
; AVX2-NEXT: vpsllw $13, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsllw $13, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: testl %eax, %eax
|
||||
; AVX2-NEXT: sete %al
|
||||
|
@ -3784,7 +3774,6 @@ define i1 @allones_v8i64_and4(<8 x i64> %arg) {
|
|||
; AVX2-NEXT: vpsllq $61, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||
; AVX2-NEXT: cmpb $-1, %al
|
||||
; AVX2-NEXT: sete %al
|
||||
|
@ -3854,7 +3843,6 @@ define i1 @allzeros_v8i64_and4(<8 x i64> %arg) {
|
|||
; AVX2-NEXT: vpsllq $61, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpsllq $61, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||
; AVX2-NEXT: testb %al, %al
|
||||
; AVX2-NEXT: sete %al
|
||||
|
|
|
@ -1443,7 +1443,6 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
|
|||
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||
; AVX2-NEXT: cmpb $-1, %al
|
||||
; AVX2-NEXT: sete %al
|
||||
|
@ -1582,7 +1581,6 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
|
|||
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: cmpl $-1, %eax
|
||||
; AVX2-NEXT: sete %al
|
||||
|
|
|
@ -1420,7 +1420,6 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
|
|||
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||
; AVX2-NEXT: testb %al, %al
|
||||
; AVX2-NEXT: setne %al
|
||||
|
@ -1557,7 +1556,6 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
|
|||
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vpmovmskb %ymm0, %eax
|
||||
; AVX2-NEXT: testl %eax, %eax
|
||||
; AVX2-NEXT: setne %al
|
||||
|
|
Loading…
Reference in New Issue