forked from OSchip/llvm-project
[X86][AVX] Push sign extensions of comparison bool results through bitops (PR42025)
As discussed on PR42025, with more complex boolean math we can end up with many truncations/extensions of the comparison results through each bitop. This patch handles the cases introduced in combineBitcastvxi1 by pushing the sign extension through the AND/OR/XOR ops so its just the original SETCC ops that gets extended. Differential Revision: https://reviews.llvm.org/D68226 llvm-svn: 373834
This commit is contained in:
parent
e2321bb448
commit
8815be04ec
|
@ -35121,6 +35121,23 @@ static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper to push sign extension of vXi1 SETCC result through bitops.
|
||||||
|
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
|
||||||
|
SDValue Src, const SDLoc &DL) {
|
||||||
|
switch (Src.getOpcode()) {
|
||||||
|
case ISD::SETCC:
|
||||||
|
return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
|
||||||
|
case ISD::AND:
|
||||||
|
case ISD::XOR:
|
||||||
|
case ISD::OR:
|
||||||
|
return DAG.getNode(
|
||||||
|
Src.getOpcode(), DL, SExtVT,
|
||||||
|
signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
|
||||||
|
signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
|
||||||
|
}
|
||||||
|
llvm_unreachable("Unexpected node type for vXi1 sign extension");
|
||||||
|
}
|
||||||
|
|
||||||
// Try to match patterns such as
|
// Try to match patterns such as
|
||||||
// (i16 bitcast (v16i1 x))
|
// (i16 bitcast (v16i1 x))
|
||||||
// ->
|
// ->
|
||||||
|
@ -35159,6 +35176,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
|
||||||
// For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as:
|
// For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as:
|
||||||
// (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef)
|
// (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef)
|
||||||
MVT SExtVT;
|
MVT SExtVT;
|
||||||
|
bool PropagateSExt = false;
|
||||||
switch (SrcVT.getSimpleVT().SimpleTy) {
|
switch (SrcVT.getSimpleVT().SimpleTy) {
|
||||||
default:
|
default:
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
@ -35169,8 +35187,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
|
||||||
SExtVT = MVT::v4i32;
|
SExtVT = MVT::v4i32;
|
||||||
// For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
|
// For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
|
||||||
// sign-extend to a 256-bit operation to avoid truncation.
|
// sign-extend to a 256-bit operation to avoid truncation.
|
||||||
if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256))
|
if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256)) {
|
||||||
SExtVT = MVT::v4i64;
|
SExtVT = MVT::v4i64;
|
||||||
|
PropagateSExt = true;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case MVT::v8i1:
|
case MVT::v8i1:
|
||||||
SExtVT = MVT::v8i16;
|
SExtVT = MVT::v8i16;
|
||||||
|
@ -35179,11 +35199,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
|
||||||
// If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
|
// If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
|
||||||
// 256-bit because the shuffle is cheaper than sign extending the result of
|
// 256-bit because the shuffle is cheaper than sign extending the result of
|
||||||
// the compare.
|
// the compare.
|
||||||
// TODO : use checkBitcastSrcVectorSize
|
if (Subtarget.hasAVX() && (checkBitcastSrcVectorSize(Src, 256) ||
|
||||||
if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() &&
|
checkBitcastSrcVectorSize(Src, 512))) {
|
||||||
(Src.getOperand(0).getValueType().is256BitVector() ||
|
|
||||||
Src.getOperand(0).getValueType().is512BitVector())) {
|
|
||||||
SExtVT = MVT::v8i32;
|
SExtVT = MVT::v8i32;
|
||||||
|
PropagateSExt = true;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case MVT::v16i1:
|
case MVT::v16i1:
|
||||||
|
@ -35206,7 +35225,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
|
||||||
return SDValue();
|
return SDValue();
|
||||||
};
|
};
|
||||||
|
|
||||||
SDValue V = DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
|
SDValue V = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
|
||||||
|
: DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
|
||||||
|
|
||||||
if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8 || SExtVT == MVT::v64i8) {
|
if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8 || SExtVT == MVT::v64i8) {
|
||||||
V = getPMOVMSKB(DL, V, DAG, Subtarget);
|
V = getPMOVMSKB(DL, V, DAG, Subtarget);
|
||||||
|
|
|
@ -55,17 +55,17 @@ define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: v4i64:
|
; AVX1-LABEL: v4i64:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
|
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
|
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
|
||||||
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
|
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
|
||||||
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
|
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1
|
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
|
|
||||||
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1
|
|
||||||
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
|
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
|
||||||
; AVX1-NEXT: vandpd %ymm1, %ymm0, %ymm0
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||||
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||||
|
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
|
||||||
|
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
|
||||||
|
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||||
|
; AVX1-NEXT: vandpd %ymm2, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vmovmskpd %ymm0, %eax
|
; AVX1-NEXT: vmovmskpd %ymm0, %eax
|
||||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
|
@ -73,9 +73,9 @@ define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: v4i64:
|
; AVX2-LABEL: v4i64:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
|
; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
|
||||||
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
|
||||||
; AVX2-NEXT: vmovmskpd %ymm0, %eax
|
; AVX2-NEXT: vmovmskpd %ymm0, %eax
|
||||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
|
@ -121,9 +121,9 @@ define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double>
|
||||||
;
|
;
|
||||||
; AVX12-LABEL: v4f64:
|
; AVX12-LABEL: v4f64:
|
||||||
; AVX12: # %bb.0:
|
; AVX12: # %bb.0:
|
||||||
|
; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm2
|
||||||
; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
|
; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
|
||||||
; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1
|
; AVX12-NEXT: vandpd %ymm2, %ymm0, %ymm0
|
||||||
; AVX12-NEXT: vandpd %ymm1, %ymm0, %ymm0
|
|
||||||
; AVX12-NEXT: vmovmskpd %ymm0, %eax
|
; AVX12-NEXT: vmovmskpd %ymm0, %eax
|
||||||
; AVX12-NEXT: # kill: def $al killed $al killed $eax
|
; AVX12-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX12-NEXT: vzeroupper
|
; AVX12-NEXT: vzeroupper
|
||||||
|
@ -241,32 +241,28 @@ define i8 @v8i32_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: v8i32_and:
|
; AVX1-LABEL: v8i32_and:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm4
|
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4
|
||||||
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
|
|
||||||
; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1
|
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
|
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
|
|
||||||
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
|
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
|
||||||
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
|
||||||
; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||||
; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3
|
||||||
|
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
||||||
|
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||||
|
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
|
||||||
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: v8i32_and:
|
; AVX2-LABEL: v8i32_and:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
|
; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2
|
||||||
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
|
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm1
|
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
||||||
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
|
@ -314,32 +310,28 @@ define i8 @v8i32_or(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: v8i32_or:
|
; AVX1-LABEL: v8i32_or:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm4
|
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4
|
||||||
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
|
|
||||||
; AVX1-NEXT: vpor %xmm1, %xmm4, %xmm1
|
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
|
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
|
|
||||||
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
|
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
|
||||||
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
|
||||||
; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||||
; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3
|
||||||
|
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
||||||
|
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||||
|
; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
|
||||||
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: v8i32_or:
|
; AVX2-LABEL: v8i32_or:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
|
; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2
|
||||||
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
|
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm1
|
; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
|
||||||
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
|
@ -395,42 +387,36 @@ define i8 @v8i32_or_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d,
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: v8i32_or_and:
|
; AVX1-LABEL: v8i32_or_and:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm6
|
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
|
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm7
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
; AVX1-NEXT: vpcmpgtd %xmm6, %xmm7, %xmm6
|
||||||
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm1
|
|
||||||
; AVX1-NEXT: vpor %xmm1, %xmm6, %xmm1
|
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
|
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
|
|
||||||
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
|
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
|
||||||
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm2, %ymm2
|
||||||
; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||||
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
|
||||||
|
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm6, %xmm3
|
||||||
|
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
|
||||||
|
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||||
|
; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm1
|
; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm1
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm2
|
; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm2
|
||||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
|
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
|
||||||
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm2
|
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm2
|
||||||
; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
|
||||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: v8i32_or_and:
|
; AVX2-LABEL: v8i32_or_and:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
|
; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm2
|
||||||
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
|
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm1
|
; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
|
|
||||||
; AVX2-NEXT: vpcmpeqd %ymm5, %ymm4, %ymm1
|
; AVX2-NEXT: vpcmpeqd %ymm5, %ymm4, %ymm1
|
||||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
|
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
|
|
||||||
; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
|
@ -482,13 +468,10 @@ define i8 @v8f32_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>
|
||||||
;
|
;
|
||||||
; AVX12-LABEL: v8f32_and:
|
; AVX12-LABEL: v8f32_and:
|
||||||
; AVX12: # %bb.0:
|
; AVX12: # %bb.0:
|
||||||
|
; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm2
|
||||||
; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
|
; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
|
||||||
; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm1
|
; AVX12-NEXT: vandps %ymm2, %ymm0, %ymm0
|
||||||
; AVX12-NEXT: vandps %ymm1, %ymm0, %ymm0
|
; AVX12-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
||||||
; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX12-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX12-NEXT: # kill: def $al killed $al killed $eax
|
; AVX12-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX12-NEXT: vzeroupper
|
; AVX12-NEXT: vzeroupper
|
||||||
; AVX12-NEXT: retq
|
; AVX12-NEXT: retq
|
||||||
|
@ -536,13 +519,10 @@ define i8 @v8f32_xor(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>
|
||||||
;
|
;
|
||||||
; AVX12-LABEL: v8f32_xor:
|
; AVX12-LABEL: v8f32_xor:
|
||||||
; AVX12: # %bb.0:
|
; AVX12: # %bb.0:
|
||||||
|
; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm2
|
||||||
; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
|
; AVX12-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
|
||||||
; AVX12-NEXT: vcmpltps %ymm2, %ymm3, %ymm1
|
; AVX12-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
||||||
; AVX12-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
; AVX12-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
||||||
; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX12-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX12-NEXT: # kill: def $al killed $al killed $eax
|
; AVX12-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX12-NEXT: vzeroupper
|
; AVX12-NEXT: vzeroupper
|
||||||
; AVX12-NEXT: retq
|
; AVX12-NEXT: retq
|
||||||
|
@ -604,17 +584,12 @@ define i8 @v8f32_xor_and(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x fl
|
||||||
;
|
;
|
||||||
; AVX12-LABEL: v8f32_xor_and:
|
; AVX12-LABEL: v8f32_xor_and:
|
||||||
; AVX12: # %bb.0:
|
; AVX12: # %bb.0:
|
||||||
|
; AVX12-NEXT: vcmpeq_uqps %ymm3, %ymm2, %ymm2
|
||||||
; AVX12-NEXT: vcmpnleps %ymm1, %ymm0, %ymm0
|
; AVX12-NEXT: vcmpnleps %ymm1, %ymm0, %ymm0
|
||||||
; AVX12-NEXT: vcmpeq_uqps %ymm3, %ymm2, %ymm1
|
; AVX12-NEXT: vxorps %ymm2, %ymm0, %ymm0
|
||||||
; AVX12-NEXT: vxorps %ymm1, %ymm0, %ymm0
|
|
||||||
; AVX12-NEXT: vcmpltps %ymm4, %ymm5, %ymm1
|
; AVX12-NEXT: vcmpltps %ymm4, %ymm5, %ymm1
|
||||||
; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
|
; AVX12-NEXT: vandps %ymm1, %ymm0, %ymm0
|
||||||
; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
; AVX12-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
|
|
||||||
; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
|
||||||
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX12-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX12-NEXT: # kill: def $al killed $al killed $eax
|
; AVX12-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX12-NEXT: vzeroupper
|
; AVX12-NEXT: vzeroupper
|
||||||
; AVX12-NEXT: retq
|
; AVX12-NEXT: retq
|
||||||
|
|
|
@ -50,56 +50,45 @@ define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
|
||||||
;
|
;
|
||||||
; AVX1-LABEL: v8i64:
|
; AVX1-LABEL: v8i64:
|
||||||
; AVX1: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8
|
; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm8
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
|
; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm9
|
||||||
; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8
|
; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8
|
||||||
|
; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5
|
||||||
|
; AVX1-NEXT: vpackssdw %xmm8, %xmm5, %xmm8
|
||||||
|
; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm7
|
||||||
|
; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm5
|
||||||
|
; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5
|
||||||
|
; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm4
|
||||||
|
; AVX1-NEXT: vpackssdw %xmm5, %xmm4, %xmm4
|
||||||
|
; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm4, %ymm4
|
||||||
|
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
|
||||||
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
|
||||||
|
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
|
||||||
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
|
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
|
||||||
; AVX1-NEXT: vpackssdw %xmm8, %xmm1, %xmm8
|
; AVX1-NEXT: vpackssdw %xmm5, %xmm1, %xmm1
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
|
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
|
||||||
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
|
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3
|
||||||
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
|
||||||
; AVX1-NEXT: vpackssdw %xmm8, %xmm0, %xmm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1
|
; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm1
|
|
||||||
; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm2
|
|
||||||
; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
|
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2
|
|
||||||
; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
|
|
||||||
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
|
||||||
; AVX1-NEXT: vpcmpgtq %xmm6, %xmm4, %xmm3
|
|
||||||
; AVX1-NEXT: vpackssdw %xmm2, %xmm3, %xmm2
|
|
||||||
; AVX1-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
|
|
||||||
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX1-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX1-NEXT: vzeroupper
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: v8i64:
|
; AVX2-LABEL: v8i64:
|
||||||
; AVX2: # %bb.0:
|
; AVX2: # %bb.0:
|
||||||
|
; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm5
|
||||||
|
; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm4
|
||||||
|
; AVX2-NEXT: vpackssdw %ymm5, %ymm4, %ymm4
|
||||||
; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1
|
; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm1
|
||||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
|
|
||||||
; AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
|
|
||||||
; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0
|
; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||||
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||||
; AVX2-NEXT: vpcmpgtq %ymm7, %ymm5, %ymm1
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
|
|
||||||
; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
|
||||||
; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm2
|
|
||||||
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
|
|
||||||
; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
|
|
||||||
; AVX2-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
|
|
||||||
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
|
||||||
; AVX2-NEXT: vpmovmskb %xmm0, %eax
|
|
||||||
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX2-NEXT: vzeroupper
|
; AVX2-NEXT: vzeroupper
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
|
@ -171,29 +160,42 @@ define i8 @v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double>
|
||||||
; SSE-NEXT: # kill: def $al killed $al killed $eax
|
; SSE-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX12-LABEL: v8f64:
|
; AVX1-LABEL: v8f64:
|
||||||
; AVX12: # %bb.0:
|
; AVX1: # %bb.0:
|
||||||
; AVX12-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1
|
; AVX1-NEXT: vcmpltpd %ymm5, %ymm7, %ymm5
|
||||||
; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
|
; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm7
|
||||||
; AVX12-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
|
; AVX1-NEXT: vpackssdw %xmm7, %xmm5, %xmm5
|
||||||
; AVX12-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0
|
; AVX1-NEXT: vcmpltpd %ymm4, %ymm6, %ymm4
|
||||||
; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
|
; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm6
|
||||||
; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
; AVX1-NEXT: vpackssdw %xmm6, %xmm4, %xmm4
|
||||||
; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4
|
||||||
; AVX12-NEXT: vcmpltpd %ymm5, %ymm7, %ymm1
|
; AVX1-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1
|
||||||
; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
|
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||||
; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
|
; AVX1-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
|
||||||
; AVX12-NEXT: vcmpltpd %ymm4, %ymm6, %ymm2
|
; AVX1-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0
|
||||||
; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm3
|
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||||
; AVX12-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
|
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
|
||||||
; AVX12-NEXT: vpackssdw %xmm1, %xmm2, %xmm1
|
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||||
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
|
; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0
|
||||||
; AVX12-NEXT: vpsllw $15, %xmm0, %xmm0
|
; AVX1-NEXT: vmovmskps %ymm0, %eax
|
||||||
; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
|
; AVX1-NEXT: # kill: def $al killed $al killed $eax
|
||||||
; AVX12-NEXT: vpmovmskb %xmm0, %eax
|
; AVX1-NEXT: vzeroupper
|
||||||
; AVX12-NEXT: # kill: def $al killed $al killed $eax
|
; AVX1-NEXT: retq
|
||||||
; AVX12-NEXT: vzeroupper
|
;
|
||||||
; AVX12-NEXT: retq
|
; AVX2-LABEL: v8f64:
|
||||||
|
; AVX2: # %bb.0:
|
||||||
|
; AVX2-NEXT: vcmpltpd %ymm5, %ymm7, %ymm5
|
||||||
|
; AVX2-NEXT: vcmpltpd %ymm4, %ymm6, %ymm4
|
||||||
|
; AVX2-NEXT: vpackssdw %ymm5, %ymm4, %ymm4
|
||||||
|
; AVX2-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1
|
||||||
|
; AVX2-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0
|
||||||
|
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||||
|
; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
|
||||||
|
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||||
|
; AVX2-NEXT: vmovmskps %ymm0, %eax
|
||||||
|
; AVX2-NEXT: # kill: def $al killed $al killed $eax
|
||||||
|
; AVX2-NEXT: vzeroupper
|
||||||
|
; AVX2-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX512F-LABEL: v8f64:
|
; AVX512F-LABEL: v8f64:
|
||||||
; AVX512F: # %bb.0:
|
; AVX512F: # %bb.0:
|
||||||
|
|
Loading…
Reference in New Issue