forked from OSchip/llvm-project
[X86][SSE] Handle basic inversion of PTEST/TESTP operands (PR38522)
PTEST/TESTP sets EFLAGS as: TESTZ: ZF = (Op0 & Op1) == 0 TESTC: CF = (~Op0 & Op1) == 0 TESTNZC: ZF == 0 && CF == 0 If we are inverting the 0'th operand of a PTEST/TESTP instruction we can adjust the comparisons to correct handle the inversion implicitly. Additionally, for "TESTZ" (ZF) cases, the allones case, PTEST(X,-1) can be simplified to PTEST(X,X). We can expand this for the TESTZ(X,~Y) pattern and also handle KTEST/KORTEST in the future. Differential Revision: https://reviews.llvm.org/D76984
This commit is contained in:
parent
8b8cd150a4
commit
918ccb64b0
|
@ -39777,6 +39777,75 @@ static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) {
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
/// If we are inverting an PTEST/TESTP operand, attempt to adjust the CC
|
||||
/// to avoid the inversion.
|
||||
static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
|
||||
SelectionDAG &DAG) {
|
||||
// TODO: Handle X86ISD::KTEST/X86ISD::KORTEST.
|
||||
if (EFLAGS.getOpcode() != X86ISD::PTEST &&
|
||||
EFLAGS.getOpcode() != X86ISD::TESTP)
|
||||
return SDValue();
|
||||
|
||||
// PTEST/TESTP sets EFLAGS as:
|
||||
// TESTZ: ZF = (Op0 & Op1) == 0
|
||||
// TESTC: CF = (~Op0 & Op1) == 0
|
||||
// TESTNZC: ZF == 0 && CF == 0
|
||||
EVT VT = EFLAGS.getValueType();
|
||||
SDValue Op0 = EFLAGS.getOperand(0);
|
||||
SDValue Op1 = EFLAGS.getOperand(1);
|
||||
EVT OpVT = Op0.getValueType();
|
||||
|
||||
// TEST*(~X,Y) == TEST*(X,Y)
|
||||
if (SDValue NotOp0 = IsNOT(Op0, DAG)) {
|
||||
X86::CondCode InvCC;
|
||||
switch (CC) {
|
||||
case X86::COND_B:
|
||||
// testc -> testz.
|
||||
InvCC = X86::COND_E;
|
||||
break;
|
||||
case X86::COND_AE:
|
||||
// !testc -> !testz.
|
||||
InvCC = X86::COND_NE;
|
||||
break;
|
||||
case X86::COND_E:
|
||||
// testz -> testc.
|
||||
InvCC = X86::COND_B;
|
||||
break;
|
||||
case X86::COND_NE:
|
||||
// !testz -> !testc.
|
||||
InvCC = X86::COND_AE;
|
||||
break;
|
||||
case X86::COND_A:
|
||||
case X86::COND_BE:
|
||||
// testnzc -> testnzc (no change).
|
||||
InvCC = CC;
|
||||
break;
|
||||
default:
|
||||
InvCC = X86::COND_INVALID;
|
||||
break;
|
||||
}
|
||||
|
||||
if (InvCC != X86::COND_INVALID) {
|
||||
CC = InvCC;
|
||||
return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,
|
||||
DAG.getBitcast(OpVT, NotOp0), Op1);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: TESTZ(X,~Y) == TESTC(Y,X)
|
||||
|
||||
// TESTZ(X,-1) == TESTZ(X,X)
|
||||
// TESTZ(-1,X) == TESTZ(X,X)
|
||||
if (CC == X86::COND_E || CC == X86::COND_NE) {
|
||||
if (ISD::isBuildVectorAllOnes(Op0.getNode()))
|
||||
return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op1, Op1);
|
||||
if (ISD::isBuildVectorAllOnes(Op1.getNode()))
|
||||
return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// Optimize an EFLAGS definition used according to the condition code \p CC
|
||||
/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
|
||||
/// uses of chain values.
|
||||
|
@ -39789,6 +39858,10 @@ static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
|
|||
|
||||
if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
|
||||
return R;
|
||||
|
||||
if (SDValue R = combinePTESTCC(EFLAGS, CC, DAG))
|
||||
return R;
|
||||
|
||||
return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget);
|
||||
}
|
||||
|
||||
|
|
|
@ -9,10 +9,8 @@ define i32 @ptestz_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
|
|||
; CHECK-LABEL: ptestz_128_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vptest %xmm1, %xmm0
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: cmovael %esi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
|
||||
%t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %d)
|
||||
|
@ -25,11 +23,8 @@ define i32 @ptestz_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
|
|||
; CHECK-LABEL: ptestz_256_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
|
||||
; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vptest %ymm1, %ymm0
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: cmovael %esi, %eax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
|
||||
|
@ -85,10 +80,8 @@ define i32 @ptestc_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
|
|||
; CHECK-LABEL: ptestc_128_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vptest %xmm1, %xmm0
|
||||
; CHECK-NEXT: cmovael %esi, %eax
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
|
||||
%t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %d)
|
||||
|
@ -101,11 +94,8 @@ define i32 @ptestc_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
|
|||
; CHECK-LABEL: ptestc_256_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
|
||||
; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vptest %ymm1, %ymm0
|
||||
; CHECK-NEXT: cmovael %esi, %eax
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
|
||||
|
@ -123,10 +113,8 @@ define i32 @ptestnzc_128_invert0(<2 x i64> %c, <2 x i64> %d, i32 %a, i32 %b) {
|
|||
; CHECK-LABEL: ptestnzc_128_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vptest %xmm1, %xmm0
|
||||
; CHECK-NEXT: cmovael %esi, %eax
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = xor <2 x i64> %c, <i64 -1, i64 -1>
|
||||
%t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %d)
|
||||
|
@ -139,9 +127,6 @@ define i32 @ptestnzc_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
|
|||
; CHECK-LABEL: ptestnzc_256_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
|
||||
; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vptest %ymm1, %ymm0
|
||||
; CHECK-NEXT: cmovbel %esi, %eax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
|
@ -153,6 +138,21 @@ define i32 @ptestnzc_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
|
|||
ret i32 %t4
|
||||
}
|
||||
|
||||
define i32 @ptestnzc_256_invert0_commute(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: ptestnzc_256_invert0_commute:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vptest %ymm1, %ymm0
|
||||
; CHECK-NEXT: cmoval %esi, %eax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
|
||||
%t2 = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %t1, <4 x i64> %d)
|
||||
%t3 = icmp eq i32 %t2, 0
|
||||
%t4 = select i1 %t3, i32 %a, i32 %b
|
||||
ret i32 %t4
|
||||
}
|
||||
|
||||
;
|
||||
; testz(-1,X) -> testz(X,X)
|
||||
;
|
||||
|
@ -161,8 +161,7 @@ define i32 @ptestz_128_allones0(<2 x i64> %c, i32 %a, i32 %b) {
|
|||
; CHECK-LABEL: ptestz_128_allones0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vptest %xmm0, %xmm1
|
||||
; CHECK-NEXT: vptest %xmm0, %xmm0
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> <i64 -1, i64 -1>, <2 x i64> %c)
|
||||
|
@ -175,9 +174,7 @@ define i32 @ptestz_256_allones0(<4 x i64> %c, i32 %a, i32 %b) {
|
|||
; CHECK-LABEL: ptestz_256_allones0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vptest %ymm0, %ymm1
|
||||
; CHECK-NEXT: vptest %ymm0, %ymm0
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -195,8 +192,7 @@ define i32 @ptestz_128_allones1(<2 x i64> %c, i32 %a, i32 %b) {
|
|||
; CHECK-LABEL: ptestz_128_allones1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vptest %xmm1, %xmm0
|
||||
; CHECK-NEXT: vptest %xmm0, %xmm0
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> <i64 -1, i64 -1>)
|
||||
|
@ -209,9 +205,7 @@ define i32 @ptestz_256_allones1(<4 x i64> %c, i32 %a, i32 %b) {
|
|||
; CHECK-LABEL: ptestz_256_allones1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vptest %ymm1, %ymm0
|
||||
; CHECK-NEXT: vptest %ymm0, %ymm0
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -226,10 +220,8 @@ define zeroext i1 @PR38522(<16 x i8>* %x, <16 x i8>* %y) {
|
|||
; CHECK: # %bb.0: # %start
|
||||
; CHECK-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; CHECK-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vptest %xmm1, %xmm0
|
||||
; CHECK-NEXT: setb %al
|
||||
; CHECK-NEXT: vptest %xmm0, %xmm0
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: retq
|
||||
start:
|
||||
%0 = load <16 x i8>, <16 x i8>* %x, align 16
|
||||
|
|
|
@ -9,10 +9,8 @@ define i32 @testpdz_128_invert0(<2 x double> %c, <2 x double> %d, i32 %a, i32 %b
|
|||
; CHECK-LABEL: testpdz_128_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vtestpd %xmm1, %xmm0
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: cmovael %esi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = bitcast <2 x double> %c to <2 x i64>
|
||||
%t1 = xor <2 x i64> %t0, <i64 -1, i64 -1>
|
||||
|
@ -27,11 +25,8 @@ define i32 @testpdz_256_invert0(<4 x double> %c, <4 x double> %d, i32 %a, i32 %b
|
|||
; CHECK-LABEL: testpdz_256_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
|
||||
; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vtestpd %ymm1, %ymm0
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: cmovael %esi, %eax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = bitcast <4 x double> %c to <4 x i64>
|
||||
|
@ -93,10 +88,8 @@ define i32 @testpdc_128_invert0(<2 x double> %c, <2 x double> %d, i32 %a, i32 %b
|
|||
; CHECK-LABEL: testpdc_128_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vtestpd %xmm1, %xmm0
|
||||
; CHECK-NEXT: cmovael %esi, %eax
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = bitcast <2 x double> %c to <2 x i64>
|
||||
%t1 = xor <2 x i64> %t0, <i64 -1, i64 -1>
|
||||
|
@ -111,11 +104,8 @@ define i32 @testpdc_256_invert0(<4 x double> %c, <4 x double> %d, i32 %a, i32 %b
|
|||
; CHECK-LABEL: testpdc_256_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
|
||||
; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vtestpd %ymm1, %ymm0
|
||||
; CHECK-NEXT: cmovael %esi, %eax
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = bitcast <4 x double> %c to <4 x i64>
|
||||
|
@ -135,8 +125,6 @@ define i32 @testpdnzc_128_invert0(<2 x double> %c, <2 x double> %d, i32 %a, i32
|
|||
; CHECK-LABEL: testpdnzc_128_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vtestpd %xmm1, %xmm0
|
||||
; CHECK-NEXT: cmovbel %esi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -153,9 +141,6 @@ define i32 @testpdnzc_256_invert0(<4 x double> %c, <4 x double> %d, i32 %a, i32
|
|||
; CHECK-LABEL: testpdnzc_256_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
|
||||
; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vtestpd %ymm1, %ymm0
|
||||
; CHECK-NEXT: cmovbel %esi, %eax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
|
|
|
@ -9,10 +9,8 @@ define i32 @testpsz_128_invert0(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b)
|
|||
; CHECK-LABEL: testpsz_128_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vtestps %xmm1, %xmm0
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: cmovael %esi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = bitcast <4 x float> %c to <2 x i64>
|
||||
%t1 = xor <2 x i64> %t0, <i64 -1, i64 -1>
|
||||
|
@ -27,11 +25,8 @@ define i32 @testpsz_256_invert0(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b)
|
|||
; CHECK-LABEL: testpsz_256_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
|
||||
; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vtestps %ymm1, %ymm0
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: cmovael %esi, %eax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = bitcast <8 x float> %c to <4 x i64>
|
||||
|
@ -93,10 +88,8 @@ define i32 @testpsc_128_invert0(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b)
|
|||
; CHECK-LABEL: testpsc_128_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vtestps %xmm1, %xmm0
|
||||
; CHECK-NEXT: cmovael %esi, %eax
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = bitcast <4 x float> %c to <2 x i64>
|
||||
%t1 = xor <2 x i64> %t0, <i64 -1, i64 -1>
|
||||
|
@ -111,11 +104,8 @@ define i32 @testpsc_256_invert0(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b)
|
|||
; CHECK-LABEL: testpsc_256_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
|
||||
; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vtestps %ymm1, %ymm0
|
||||
; CHECK-NEXT: cmovael %esi, %eax
|
||||
; CHECK-NEXT: cmovnel %esi, %eax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = bitcast <8 x float> %c to <4 x i64>
|
||||
|
@ -135,8 +125,6 @@ define i32 @testpsnzc_128_invert0(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b
|
|||
; CHECK-LABEL: testpsnzc_128_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vtestps %xmm1, %xmm0
|
||||
; CHECK-NEXT: cmovbel %esi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
|
@ -153,9 +141,6 @@ define i32 @testpsnzc_256_invert0(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b
|
|||
; CHECK-LABEL: testpsnzc_256_invert0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2
|
||||
; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vtestps %ymm1, %ymm0
|
||||
; CHECK-NEXT: cmovbel %esi, %eax
|
||||
; CHECK-NEXT: vzeroupper
|
||||
|
|
Loading…
Reference in New Issue