forked from OSchip/llvm-project
[X86] Add a DAG combine to turn (and (bitcast (vXi1 (concat_vectors (vYi1 setcc), undef,))), C) into (bitcast (vXi1 (concat_vectors (vYi1 setcc), zero,)))
The legalization of v2i1->i2 or v4i1->i4 bitcasts followed by a setcc can create an and after the bitcast. If we're lucky enough that the input to the bitcast is a concat_vectors where the first operand is a setcc that can natively 0 all the upper bits of ak-register, then we should replace the other operands of the concat_vectors with zero in order to remove the AND. With the AND removed we might be able to use a kortest on the result. Differential Revision: https://reviews.llvm.org/D69205
This commit is contained in:
parent
8aa0a785c4
commit
3da269a248
|
@ -39101,6 +39101,71 @@ static SDValue combineParity(SDNode *N, SelectionDAG &DAG,
|
|||
return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0), Setnp);
|
||||
}
|
||||
|
||||
|
||||
// Look for (and (bitcast (vXi1 (concat_vectors (vYi1 setcc), undef,))), C)
|
||||
// Where C is a mask containing the same number of bits as the setcc and
|
||||
// where the setcc will freely 0 upper bits of k-register. We can replace the
|
||||
// undef in the concat with 0s and remove the AND. This mainly helps with
|
||||
// v2i1/v4i1 setcc being casted to scalar.
|
||||
static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
// Make sure this is an AND with constant. We will check the value of the
|
||||
// constant later.
|
||||
if (!isa<ConstantSDNode>(N->getOperand(1)))
|
||||
return SDValue();
|
||||
|
||||
// This is implied by the ConstantSDNode.
|
||||
assert(!VT.isVector() && "Expected scalar VT!");
|
||||
|
||||
if (N->getOperand(0).getOpcode() != ISD::BITCAST ||
|
||||
!N->getOperand(0).hasOneUse() ||
|
||||
!N->getOperand(0).getOperand(0).hasOneUse())
|
||||
return SDValue();
|
||||
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
SDValue Src = N->getOperand(0).getOperand(0);
|
||||
EVT SrcVT = Src.getValueType();
|
||||
if (!SrcVT.isVector() || SrcVT.getVectorElementType() != MVT::i1 ||
|
||||
!TLI.isTypeLegal(SrcVT))
|
||||
return SDValue();
|
||||
|
||||
if (Src.getOpcode() != ISD::CONCAT_VECTORS)
|
||||
return SDValue();
|
||||
|
||||
// We only care about the first subvector of the concat, we expect the
|
||||
// other subvectors to be ignored due to the AND if we make the change.
|
||||
SDValue SubVec = Src.getOperand(0);
|
||||
EVT SubVecVT = SubVec.getValueType();
|
||||
|
||||
// First subvector should be a setcc with a legal result type. The RHS of the
|
||||
// AND should be a mask with this many bits.
|
||||
if (SubVec.getOpcode() != ISD::SETCC || !TLI.isTypeLegal(SubVecVT) ||
|
||||
!N->getConstantOperandAPInt(1).isMask(SubVecVT.getVectorNumElements()))
|
||||
return SDValue();
|
||||
|
||||
EVT SetccVT = SubVec.getOperand(0).getValueType();
|
||||
if (!TLI.isTypeLegal(SetccVT) ||
|
||||
!(Subtarget.hasVLX() || SetccVT.is512BitVector()))
|
||||
return SDValue();
|
||||
|
||||
if (!(Subtarget.hasBWI() || SetccVT.getScalarSizeInBits() >= 32))
|
||||
return SDValue();
|
||||
|
||||
// We passed all the checks. Rebuild the concat_vectors with zeroes
|
||||
// and cast it back to VT.
|
||||
SDLoc dl(N);
|
||||
SmallVector<SDValue, 4> Ops(Src.getNumOperands(),
|
||||
DAG.getConstant(0, dl, SubVecVT));
|
||||
Ops[0] = SubVec;
|
||||
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT,
|
||||
Ops);
|
||||
return DAG.getBitcast(VT, Concat);
|
||||
}
|
||||
|
||||
static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget &Subtarget) {
|
||||
|
@ -39150,6 +39215,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
|
|||
}
|
||||
}
|
||||
|
||||
if (SDValue V = combineScalarAndWithMaskSetcc(N, DAG, Subtarget))
|
||||
return V;
|
||||
|
||||
if (DCI.isBeforeLegalizeOps())
|
||||
return SDValue();
|
||||
|
||||
|
|
|
@ -675,7 +675,6 @@ define i1 @allones_v4i32_sign(<4 x i32> %arg) {
|
|||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andb $15, %al
|
||||
; SKX-NEXT: cmpb $15, %al
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: retq
|
||||
|
@ -714,8 +713,7 @@ define i1 @allzeros_v4i32_sign(<4 x i32> %arg) {
|
|||
; SKX-LABEL: allzeros_v4i32_sign:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovd2m %xmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testb $15, %al
|
||||
; SKX-NEXT: kortestb %k0, %k0
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: retq
|
||||
%tmp = icmp slt <4 x i32> %arg, zeroinitializer
|
||||
|
@ -963,7 +961,6 @@ define i1 @allones_v4i64_sign(<4 x i64> %arg) {
|
|||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovq2m %ymm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andb $15, %al
|
||||
; SKX-NEXT: cmpb $15, %al
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: vzeroupper
|
||||
|
@ -1005,8 +1002,7 @@ define i1 @allzeros_v4i64_sign(<4 x i64> %arg) {
|
|||
; SKX-LABEL: allzeros_v4i64_sign:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovq2m %ymm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testb $15, %al
|
||||
; SKX-NEXT: kortestb %k0, %k0
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
|
@ -1971,7 +1967,6 @@ define i1 @allones_v4i32_and1(<4 x i32> %arg) {
|
|||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andb $15, %al
|
||||
; SKX-NEXT: cmpb $15, %al
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: retq
|
||||
|
@ -2012,8 +2007,7 @@ define i1 @allzeros_v4i32_and1(<4 x i32> %arg) {
|
|||
; SKX-LABEL: allzeros_v4i32_and1:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testb $15, %al
|
||||
; SKX-NEXT: kortestb %k0, %k0
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: retq
|
||||
%tmp = and <4 x i32> %arg, <i32 1, i32 1, i32 1, i32 1>
|
||||
|
@ -2310,7 +2304,6 @@ define i1 @allones_v2i64_and1(<2 x i64> %arg) {
|
|||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andb $3, %al
|
||||
; SKX-NEXT: cmpb $3, %al
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: retq
|
||||
|
@ -2352,8 +2345,7 @@ define i1 @allzeros_v2i64_and1(<2 x i64> %arg) {
|
|||
; SKX-LABEL: allzeros_v2i64_and1:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testb $3, %al
|
||||
; SKX-NEXT: kortestb %k0, %k0
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: retq
|
||||
%tmp = and <2 x i64> %arg, <i64 1, i64 1>
|
||||
|
@ -2410,7 +2402,6 @@ define i1 @allones_v4i64_and1(<4 x i64> %arg) {
|
|||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andb $15, %al
|
||||
; SKX-NEXT: cmpb $15, %al
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: vzeroupper
|
||||
|
@ -2467,8 +2458,7 @@ define i1 @allzeros_v4i64_and1(<4 x i64> %arg) {
|
|||
; SKX-LABEL: allzeros_v4i64_and1:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testb $15, %al
|
||||
; SKX-NEXT: kortestb %k0, %k0
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
|
@ -3380,7 +3370,6 @@ define i1 @allones_v4i32_and4(<4 x i32> %arg) {
|
|||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andb $15, %al
|
||||
; SKX-NEXT: cmpb $15, %al
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: retq
|
||||
|
@ -3421,8 +3410,7 @@ define i1 @allzeros_v4i32_and4(<4 x i32> %arg) {
|
|||
; SKX-LABEL: allzeros_v4i32_and4:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vptestmd {{.*}}(%rip){1to4}, %xmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testb $15, %al
|
||||
; SKX-NEXT: kortestb %k0, %k0
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: retq
|
||||
%tmp = and <4 x i32> %arg, <i32 4, i32 4, i32 4, i32 4>
|
||||
|
@ -3719,7 +3707,6 @@ define i1 @allones_v2i64_and4(<2 x i64> %arg) {
|
|||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andb $3, %al
|
||||
; SKX-NEXT: cmpb $3, %al
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: retq
|
||||
|
@ -3761,8 +3748,7 @@ define i1 @allzeros_v2i64_and4(<2 x i64> %arg) {
|
|||
; SKX-LABEL: allzeros_v2i64_and4:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vptestmq {{.*}}(%rip), %xmm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testb $3, %al
|
||||
; SKX-NEXT: kortestb %k0, %k0
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: retq
|
||||
%tmp = and <2 x i64> %arg, <i64 4, i64 4>
|
||||
|
@ -3819,7 +3805,6 @@ define i1 @allones_v4i64_and4(<4 x i64> %arg) {
|
|||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: andb $15, %al
|
||||
; SKX-NEXT: cmpb $15, %al
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: vzeroupper
|
||||
|
@ -3876,8 +3861,7 @@ define i1 @allzeros_v4i64_and4(<4 x i64> %arg) {
|
|||
; SKX-LABEL: allzeros_v4i64_and4:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vptestmq {{.*}}(%rip){1to4}, %ymm0, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: testb $15, %al
|
||||
; SKX-NEXT: kortestb %k0, %k0
|
||||
; SKX-NEXT: sete %al
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
|
|
|
@ -937,7 +937,6 @@ define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) {
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: andb $3, %al
|
||||
; AVX512-NEXT: cmpb $3, %al
|
||||
; AVX512-NEXT: sete %al
|
||||
; AVX512-NEXT: retq
|
||||
|
@ -969,7 +968,6 @@ define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) {
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: andb $15, %al
|
||||
; AVX512-NEXT: cmpb $15, %al
|
||||
; AVX512-NEXT: sete %al
|
||||
; AVX512-NEXT: retq
|
||||
|
@ -1006,7 +1004,6 @@ define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) {
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: andb $15, %al
|
||||
; AVX512-NEXT: cmpb $15, %al
|
||||
; AVX512-NEXT: sete %al
|
||||
; AVX512-NEXT: vzeroupper
|
||||
|
@ -1087,7 +1084,6 @@ define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: andb $3, %al
|
||||
; AVX512-NEXT: cmpb $3, %al
|
||||
; AVX512-NEXT: sete %al
|
||||
; AVX512-NEXT: retq
|
||||
|
@ -1121,7 +1117,6 @@ define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: andb $15, %al
|
||||
; AVX512-NEXT: cmpb $15, %al
|
||||
; AVX512-NEXT: sete %al
|
||||
; AVX512-NEXT: retq
|
||||
|
@ -1244,7 +1239,6 @@ define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: andb $15, %al
|
||||
; AVX512-NEXT: cmpb $15, %al
|
||||
; AVX512-NEXT: sete %al
|
||||
; AVX512-NEXT: vzeroupper
|
||||
|
|
|
@ -857,7 +857,7 @@ define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) {
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: testb $3, %al
|
||||
; AVX512-NEXT: testb %al, %al
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: retq
|
||||
%a = fcmp ogt <2 x double> %x, %y
|
||||
|
@ -888,7 +888,7 @@ define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) {
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: testb $15, %al
|
||||
; AVX512-NEXT: testb %al, %al
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: retq
|
||||
%a = fcmp oeq <4 x float> %x, %y
|
||||
|
@ -924,7 +924,7 @@ define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) {
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: testb $15, %al
|
||||
; AVX512-NEXT: testb %al, %al
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
|
@ -1004,7 +1004,7 @@ define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: testb $3, %al
|
||||
; AVX512-NEXT: testb %al, %al
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: retq
|
||||
%a = icmp ugt <2 x i64> %x, %y
|
||||
|
@ -1035,7 +1035,7 @@ define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpcmpneqd %xmm1, %xmm0, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: testb $15, %al
|
||||
; AVX512-NEXT: testb %al, %al
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: retq
|
||||
%a = icmp ne <4 x i32> %x, %y
|
||||
|
@ -1157,7 +1157,7 @@ define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
|
|||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0
|
||||
; AVX512-NEXT: kmovd %k0, %eax
|
||||
; AVX512-NEXT: testb $15, %al
|
||||
; AVX512-NEXT: testb %al, %al
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
|
|
|
@ -55,7 +55,6 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) {
|
|||
; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k0
|
||||
; AVX512VL-NEXT: kmovd %k0, %eax
|
||||
; AVX512VL-NEXT: andb $3, %al
|
||||
; AVX512VL-NEXT: cmpb $3, %al
|
||||
; AVX512VL-NEXT: sete %al
|
||||
; AVX512VL-NEXT: retq
|
||||
|
@ -108,7 +107,6 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) {
|
|||
; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k0
|
||||
; AVX512VL-NEXT: kmovd %k0, %eax
|
||||
; AVX512VL-NEXT: andb $15, %al
|
||||
; AVX512VL-NEXT: cmpb $15, %al
|
||||
; AVX512VL-NEXT: sete %al
|
||||
; AVX512VL-NEXT: retq
|
||||
|
@ -259,7 +257,6 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) {
|
|||
; AVX512VL-NEXT: vpsllq $63, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vptestmq %ymm0, %ymm0, %k0
|
||||
; AVX512VL-NEXT: kmovd %k0, %eax
|
||||
; AVX512VL-NEXT: andb $15, %al
|
||||
; AVX512VL-NEXT: cmpb $15, %al
|
||||
; AVX512VL-NEXT: sete %al
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
|
@ -943,7 +940,6 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
|
|||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k0
|
||||
; AVX512VL-NEXT: kmovd %k0, %eax
|
||||
; AVX512VL-NEXT: andb $3, %al
|
||||
; AVX512VL-NEXT: cmpb $3, %al
|
||||
; AVX512VL-NEXT: sete %al
|
||||
; AVX512VL-NEXT: retq
|
||||
|
@ -997,7 +993,6 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
|
|||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k0
|
||||
; AVX512VL-NEXT: kmovd %k0, %eax
|
||||
; AVX512VL-NEXT: andb $15, %al
|
||||
; AVX512VL-NEXT: cmpb $15, %al
|
||||
; AVX512VL-NEXT: sete %al
|
||||
; AVX512VL-NEXT: retq
|
||||
|
@ -1198,7 +1193,6 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
|
|||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vptestnmq %ymm0, %ymm0, %k0
|
||||
; AVX512VL-NEXT: kmovd %k0, %eax
|
||||
; AVX512VL-NEXT: andb $15, %al
|
||||
; AVX512VL-NEXT: cmpb $15, %al
|
||||
; AVX512VL-NEXT: sete %al
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
|
|
|
@ -53,7 +53,7 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) {
|
|||
; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k0
|
||||
; AVX512VL-NEXT: kmovd %k0, %eax
|
||||
; AVX512VL-NEXT: testb $3, %al
|
||||
; AVX512VL-NEXT: testb %al, %al
|
||||
; AVX512VL-NEXT: setne %al
|
||||
; AVX512VL-NEXT: retq
|
||||
%a = trunc <2 x i64> %0 to <2 x i1>
|
||||
|
@ -103,7 +103,7 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) {
|
|||
; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k0
|
||||
; AVX512VL-NEXT: kmovd %k0, %eax
|
||||
; AVX512VL-NEXT: testb $15, %al
|
||||
; AVX512VL-NEXT: testb %al, %al
|
||||
; AVX512VL-NEXT: setne %al
|
||||
; AVX512VL-NEXT: retq
|
||||
%a = trunc <4 x i32> %0 to <4 x i1>
|
||||
|
@ -251,7 +251,7 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) {
|
|||
; AVX512VL-NEXT: vpsllq $63, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: vptestmq %ymm0, %ymm0, %k0
|
||||
; AVX512VL-NEXT: kmovd %k0, %eax
|
||||
; AVX512VL-NEXT: testb $15, %al
|
||||
; AVX512VL-NEXT: testb %al, %al
|
||||
; AVX512VL-NEXT: setne %al
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
|
@ -932,7 +932,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
|
|||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vptestnmq %xmm0, %xmm0, %k0
|
||||
; AVX512VL-NEXT: kmovd %k0, %eax
|
||||
; AVX512VL-NEXT: testb $3, %al
|
||||
; AVX512VL-NEXT: testb %al, %al
|
||||
; AVX512VL-NEXT: setne %al
|
||||
; AVX512VL-NEXT: retq
|
||||
%a = icmp eq <2 x i64> %0, zeroinitializer
|
||||
|
@ -983,7 +983,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
|
|||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vptestnmd %xmm0, %xmm0, %k0
|
||||
; AVX512VL-NEXT: kmovd %k0, %eax
|
||||
; AVX512VL-NEXT: testb $15, %al
|
||||
; AVX512VL-NEXT: testb %al, %al
|
||||
; AVX512VL-NEXT: setne %al
|
||||
; AVX512VL-NEXT: retq
|
||||
%a = icmp eq <4 x i32> %0, zeroinitializer
|
||||
|
@ -1181,7 +1181,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
|
|||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vptestnmq %ymm0, %ymm0, %k0
|
||||
; AVX512VL-NEXT: kmovd %k0, %eax
|
||||
; AVX512VL-NEXT: testb $15, %al
|
||||
; AVX512VL-NEXT: testb %al, %al
|
||||
; AVX512VL-NEXT: setne %al
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
|
|
Loading…
Reference in New Issue