forked from OSchip/llvm-project
[X86][AVX] combineBitcastvxi1 - improve handling of vectors truncated to vXi1
If we're truncating to vXi1 from a wider type, then prefer the original wider vector as is simplifies folding the separate truncations + extensions. AVX1 this is only worth it for v8i1 cases, not v4i1 where we're always better off truncating down to v4i32 for movmsk. Helps with some regressions encountered in D96609
This commit is contained in:
parent
338d162755
commit
7920527796
|
@ -39201,17 +39201,22 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
|
||||||
Op, DemandedBits, DemandedElts, DAG, Depth);
|
Op, DemandedBits, DemandedElts, DAG, Depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper to peek through bitops/setcc to determine size of source vector.
|
// Helper to peek through bitops/trunc/setcc to determine size of source vector.
|
||||||
// Allows combineBitcastvxi1 to determine what size vector generated a <X x i1>.
|
// Allows combineBitcastvxi1 to determine what size vector generated a <X x i1>.
|
||||||
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size) {
|
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
|
||||||
|
bool AllowTruncate) {
|
||||||
switch (Src.getOpcode()) {
|
switch (Src.getOpcode()) {
|
||||||
|
case ISD::TRUNCATE:
|
||||||
|
if (!AllowTruncate)
|
||||||
|
return false;
|
||||||
|
LLVM_FALLTHROUGH;
|
||||||
case ISD::SETCC:
|
case ISD::SETCC:
|
||||||
return Src.getOperand(0).getValueSizeInBits() == Size;
|
return Src.getOperand(0).getValueSizeInBits() == Size;
|
||||||
case ISD::AND:
|
case ISD::AND:
|
||||||
case ISD::XOR:
|
case ISD::XOR:
|
||||||
case ISD::OR:
|
case ISD::OR:
|
||||||
return checkBitcastSrcVectorSize(Src.getOperand(0), Size) &&
|
return checkBitcastSrcVectorSize(Src.getOperand(0), Size, AllowTruncate) &&
|
||||||
checkBitcastSrcVectorSize(Src.getOperand(1), Size);
|
checkBitcastSrcVectorSize(Src.getOperand(1), Size, AllowTruncate);
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -39266,6 +39271,7 @@ static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
|
||||||
SDValue Src, const SDLoc &DL) {
|
SDValue Src, const SDLoc &DL) {
|
||||||
switch (Src.getOpcode()) {
|
switch (Src.getOpcode()) {
|
||||||
case ISD::SETCC:
|
case ISD::SETCC:
|
||||||
|
case ISD::TRUNCATE:
|
||||||
return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
|
return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
|
||||||
case ISD::AND:
|
case ISD::AND:
|
||||||
case ISD::XOR:
|
case ISD::XOR:
|
||||||
|
@ -39349,7 +39355,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
|
||||||
SExtVT = MVT::v4i32;
|
SExtVT = MVT::v4i32;
|
||||||
// For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
|
// For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
|
||||||
// sign-extend to a 256-bit operation to avoid truncation.
|
// sign-extend to a 256-bit operation to avoid truncation.
|
||||||
if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256)) {
|
if (Subtarget.hasAVX() &&
|
||||||
|
checkBitcastSrcVectorSize(Src, 256, Subtarget.hasAVX2())) {
|
||||||
SExtVT = MVT::v4i64;
|
SExtVT = MVT::v4i64;
|
||||||
PropagateSExt = true;
|
PropagateSExt = true;
|
||||||
}
|
}
|
||||||
|
@ -39361,8 +39368,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
|
||||||
// If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
|
// If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
|
||||||
// 256-bit because the shuffle is cheaper than sign extending the result of
|
// 256-bit because the shuffle is cheaper than sign extending the result of
|
||||||
// the compare.
|
// the compare.
|
||||||
if (Subtarget.hasAVX() && (checkBitcastSrcVectorSize(Src, 256) ||
|
if (Subtarget.hasAVX() && (checkBitcastSrcVectorSize(Src, 256, true) ||
|
||||||
checkBitcastSrcVectorSize(Src, 512))) {
|
checkBitcastSrcVectorSize(Src, 512, true))) {
|
||||||
SExtVT = MVT::v8i32;
|
SExtVT = MVT::v8i32;
|
||||||
PropagateSExt = true;
|
PropagateSExt = true;
|
||||||
}
|
}
|
||||||
|
@ -39387,7 +39394,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// Split if this is a <64 x i8> comparison result.
|
// Split if this is a <64 x i8> comparison result.
|
||||||
if (checkBitcastSrcVectorSize(Src, 512)) {
|
if (checkBitcastSrcVectorSize(Src, 512, false)) {
|
||||||
SExtVT = MVT::v64i8;
|
SExtVT = MVT::v64i8;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -219,16 +219,25 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) {
|
||||||
; SSE-NEXT: sete %al
|
; SSE-NEXT: sete %al
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: trunc_v4i64_v4i1:
|
; AVX1-LABEL: trunc_v4i64_v4i1:
|
||||||
; AVX: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||||
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
|
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
|
||||||
; AVX-NEXT: vpslld $31, %xmm0, %xmm0
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
|
||||||
; AVX-NEXT: vmovmskps %xmm0, %eax
|
; AVX1-NEXT: vmovmskps %xmm0, %eax
|
||||||
; AVX-NEXT: cmpb $15, %al
|
; AVX1-NEXT: cmpb $15, %al
|
||||||
; AVX-NEXT: sete %al
|
; AVX1-NEXT: sete %al
|
||||||
; AVX-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: trunc_v4i64_v4i1:
|
||||||
|
; AVX2: # %bb.0:
|
||||||
|
; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
|
||||||
|
; AVX2-NEXT: vmovmskpd %ymm0, %eax
|
||||||
|
; AVX2-NEXT: cmpb $15, %al
|
||||||
|
; AVX2-NEXT: sete %al
|
||||||
|
; AVX2-NEXT: vzeroupper
|
||||||
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: trunc_v4i64_v4i1:
|
; AVX512F-LABEL: trunc_v4i64_v4i1:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
|
@ -296,14 +305,11 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: trunc_v8i32_v8i1:
|
; AVX1-LABEL: trunc_v8i32_v8i1:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
|
||||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u>
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||||
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||||
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX1-NEXT: cmpb $-1, %al
|
; AVX1-NEXT: cmpb $-1, %al
|
||||||
; AVX1-NEXT: sete %al
|
; AVX1-NEXT: sete %al
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
|
@ -311,11 +317,8 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: trunc_v8i32_v8i1:
|
; AVX2-LABEL: trunc_v8i32_v8i1:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
|
; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX2-NEXT: cmpb $-1, %al
|
; AVX2-NEXT: cmpb $-1, %al
|
||||||
; AVX2-NEXT: sete %al
|
; AVX2-NEXT: sete %al
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
|
@ -536,17 +539,14 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: trunc_v8i64_v8i1:
|
; AVX1-LABEL: trunc_v8i64_v8i1:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535]
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||||
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
|
||||||
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
|
|
||||||
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||||
; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
|
||||||
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
|
; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
|
||||||
; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX1-NEXT: cmpb $-1, %al
|
; AVX1-NEXT: cmpb $-1, %al
|
||||||
; AVX1-NEXT: sete %al
|
; AVX1-NEXT: sete %al
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
|
@ -557,11 +557,8 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) {
|
||||||
; AVX2-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
|
; AVX2-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
|
||||||
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
|
; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
|
||||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
|
; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX2-NEXT: cmpb $-1, %al
|
; AVX2-NEXT: cmpb $-1, %al
|
||||||
; AVX2-NEXT: sete %al
|
; AVX2-NEXT: sete %al
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
|
|
|
@ -212,16 +212,25 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) {
|
||||||
; SSE-NEXT: setne %al
|
; SSE-NEXT: setne %al
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: trunc_v4i64_v4i1:
|
; AVX1-LABEL: trunc_v4i64_v4i1:
|
||||||
; AVX: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||||
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
|
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
|
||||||
; AVX-NEXT: vpslld $31, %xmm0, %xmm0
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
|
||||||
; AVX-NEXT: vmovmskps %xmm0, %eax
|
; AVX1-NEXT: vmovmskps %xmm0, %eax
|
||||||
; AVX-NEXT: testb %al, %al
|
; AVX1-NEXT: testb %al, %al
|
||||||
; AVX-NEXT: setne %al
|
; AVX1-NEXT: setne %al
|
||||||
; AVX-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: trunc_v4i64_v4i1:
|
||||||
|
; AVX2: # %bb.0:
|
||||||
|
; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
|
||||||
|
; AVX2-NEXT: vmovmskpd %ymm0, %eax
|
||||||
|
; AVX2-NEXT: testb %al, %al
|
||||||
|
; AVX2-NEXT: setne %al
|
||||||
|
; AVX2-NEXT: vzeroupper
|
||||||
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: trunc_v4i64_v4i1:
|
; AVX512F-LABEL: trunc_v4i64_v4i1:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
|
@ -285,25 +294,21 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: trunc_v8i32_v8i1:
|
; AVX1-LABEL: trunc_v8i32_v8i1:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
|
||||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u>
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||||
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||||
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
|
; AVX1-NEXT: testb %al, %al
|
||||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX1-NEXT: testl $43690, %eax # imm = 0xAAAA
|
|
||||||
; AVX1-NEXT: setne %al
|
; AVX1-NEXT: setne %al
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: trunc_v8i32_v8i1:
|
; AVX2-LABEL: trunc_v8i32_v8i1:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
|
; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
|
; AVX2-NEXT: testb %al, %al
|
||||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
|
||||||
; AVX2-NEXT: setne %al
|
; AVX2-NEXT: setne %al
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
|
@ -521,17 +526,15 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: trunc_v8i64_v8i1:
|
; AVX1-LABEL: trunc_v8i64_v8i1:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535]
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||||
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
|
||||||
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
|
|
||||||
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||||
; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
|
||||||
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
|
; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
|
||||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: testl $43690, %eax # imm = 0xAAAA
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
||||||
|
; AVX1-NEXT: testb %al, %al
|
||||||
; AVX1-NEXT: setne %al
|
; AVX1-NEXT: setne %al
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
@ -541,11 +544,9 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) {
|
||||||
; AVX2-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
|
; AVX2-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
|
||||||
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
|
; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
|
||||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
|
; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
|
; AVX2-NEXT: testb %al, %al
|
||||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX2-NEXT: testl $43690, %eax # imm = 0xAAAA
|
|
||||||
; AVX2-NEXT: setne %al
|
; AVX2-NEXT: setne %al
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
|
|
|
@ -215,16 +215,25 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) {
|
||||||
; SSE-NEXT: setnp %al
|
; SSE-NEXT: setnp %al
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: trunc_v4i64_v4i1:
|
; AVX1-LABEL: trunc_v4i64_v4i1:
|
||||||
; AVX: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||||
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
|
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
|
||||||
; AVX-NEXT: vpslld $31, %xmm0, %xmm0
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
|
||||||
; AVX-NEXT: vmovmskps %xmm0, %eax
|
; AVX1-NEXT: vmovmskps %xmm0, %eax
|
||||||
; AVX-NEXT: testb %al, %al
|
; AVX1-NEXT: testb %al, %al
|
||||||
; AVX-NEXT: setnp %al
|
; AVX1-NEXT: setnp %al
|
||||||
; AVX-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: trunc_v4i64_v4i1:
|
||||||
|
; AVX2: # %bb.0:
|
||||||
|
; AVX2-NEXT: vpsllq $63, %ymm0, %ymm0
|
||||||
|
; AVX2-NEXT: vmovmskpd %ymm0, %eax
|
||||||
|
; AVX2-NEXT: testb %al, %al
|
||||||
|
; AVX2-NEXT: setnp %al
|
||||||
|
; AVX2-NEXT: vzeroupper
|
||||||
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: trunc_v4i64_v4i1:
|
; AVX512F-LABEL: trunc_v4i64_v4i1:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
|
@ -290,14 +299,11 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: trunc_v8i32_v8i1:
|
; AVX1-LABEL: trunc_v8i32_v8i1:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
|
||||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u>
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||||
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||||
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX1-NEXT: testb %al, %al
|
; AVX1-NEXT: testb %al, %al
|
||||||
; AVX1-NEXT: setnp %al
|
; AVX1-NEXT: setnp %al
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
|
@ -305,11 +311,8 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: trunc_v8i32_v8i1:
|
; AVX2-LABEL: trunc_v8i32_v8i1:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
|
; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX2-NEXT: testb %al, %al
|
; AVX2-NEXT: testb %al, %al
|
||||||
; AVX2-NEXT: setnp %al
|
; AVX2-NEXT: setnp %al
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
|
@ -548,17 +551,14 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: trunc_v8i64_v8i1:
|
; AVX1-LABEL: trunc_v8i64_v8i1:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [65535,65535,65535,65535]
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||||
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
|
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
|
||||||
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
|
|
||||||
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||||
; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
|
||||||
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
|
; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
|
||||||
; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX1-NEXT: testb %al, %al
|
; AVX1-NEXT: testb %al, %al
|
||||||
; AVX1-NEXT: setnp %al
|
; AVX1-NEXT: setnp %al
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
|
@ -569,11 +569,8 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) {
|
||||||
; AVX2-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
|
; AVX2-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3]
|
||||||
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
|
; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6]
|
||||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
|
; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX2-NEXT: testb %al, %al
|
; AVX2-NEXT: testb %al, %al
|
||||||
; AVX2-NEXT: setnp %al
|
; AVX2-NEXT: setnp %al
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
|
|
Loading…
Reference in New Issue