[DAGCombiner] Fold xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) anytime before LegalizeVectorOprs

xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) was only being combined at the AfterLegalizeTypes stage, this patch permits the combine to occur anytime before then as well.

The main aim with this to improve the ability to recognise bitmasks that can be converted to shuffles.

I had to modify a number of AVX512 mask tests as the basic bitcast to/from scalar pattern was being stripped out, preventing testing of the mmask bitops. By replacing the bitcasts with loads we can get almost the same result.

Differential Revision: http://reviews.llvm.org/D18944

llvm-svn: 265998
This commit is contained in:
Simon Pilgrim 2016-04-11 21:10:33 +00:00
parent 5751814eda
commit 82e54871d0
9 changed files with 305 additions and 2423 deletions

View File

@ -2765,7 +2765,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
}
// Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
// Only perform this optimization after type legalization and before
// Only perform this optimization up until type legalization, before
// LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
// adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
// we don't want to undo this promotion.
@ -2773,7 +2773,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// on scalars.
if ((N0.getOpcode() == ISD::BITCAST ||
N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
Level == AfterLegalizeTypes) {
Level <= AfterLegalizeTypes) {
SDValue In0 = N0.getOperand(0);
SDValue In1 = N1.getOperand(0);
EVT In0Ty = In0.getValueType();

View File

@ -77,15 +77,33 @@ define void @mask8_mem(i8* %ptr) {
define i16 @mand16(i16 %x, i16 %y) {
; CHECK-LABEL: mand16:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: orl %eax, %edi
; CHECK-NEXT: movw %di, %ax
; CHECK-NEXT: retq
%ma = bitcast i16 %x to <16 x i1>
%mb = bitcast i16 %y to <16 x i1>
%mc = and <16 x i1> %ma, %mb
%md = xor <16 x i1> %ma, %mb
%me = or <16 x i1> %mc, %md
%ret = bitcast <16 x i1> %me to i16
ret i16 %ret
}
define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
; CHECK-LABEL: mand16_mem:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw (%rdi), %k0
; CHECK-NEXT: kmovw (%rsi), %k1
; CHECK-NEXT: kandw %k1, %k0, %k2
; CHECK-NEXT: kxorw %k1, %k0, %k0
; CHECK-NEXT: korw %k0, %k2, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%ma = bitcast i16 %x to <16 x i1>
%mb = bitcast i16 %y to <16 x i1>
%ma = load <16 x i1>, <16 x i1>* %x
%mb = load <16 x i1>, <16 x i1>* %y
%mc = and <16 x i1> %ma, %mb
%md = xor <16 x i1> %ma, %mb
%me = or <16 x i1> %mc, %md
@ -265,13 +283,13 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
; KNL: ## BB#0:
; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: jg LBB14_1
; KNL-NEXT: jg LBB15_1
; KNL-NEXT: ## BB#2:
; KNL-NEXT: vpcmpltud %zmm2, %zmm1, %k1
; KNL-NEXT: jmp LBB14_3
; KNL-NEXT: LBB14_1:
; KNL-NEXT: jmp LBB15_3
; KNL-NEXT: LBB15_1:
; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1
; KNL-NEXT: LBB14_3:
; KNL-NEXT: LBB15_3:
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: retq
@ -280,12 +298,12 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
; SKX: ## BB#0:
; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: jg LBB14_1
; SKX-NEXT: jg LBB15_1
; SKX-NEXT: ## BB#2:
; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0
; SKX-NEXT: vpmovm2b %k0, %xmm0
; SKX-NEXT: retq
; SKX-NEXT: LBB14_1:
; SKX-NEXT: LBB15_1:
; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0
; SKX-NEXT: vpmovm2b %k0, %xmm0
; SKX-NEXT: retq
@ -300,13 +318,13 @@ define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
; KNL-LABEL: test9:
; KNL: ## BB#0:
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: jg LBB15_1
; KNL-NEXT: jg LBB16_1
; KNL-NEXT: ## BB#2:
; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
; KNL-NEXT: jmp LBB15_3
; KNL-NEXT: LBB15_1:
; KNL-NEXT: jmp LBB16_3
; KNL-NEXT: LBB16_1:
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: LBB15_3:
; KNL-NEXT: LBB16_3:
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
@ -316,13 +334,13 @@ define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
; SKX-LABEL: test9:
; SKX: ## BB#0:
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: jg LBB15_1
; SKX-NEXT: jg LBB16_1
; SKX-NEXT: ## BB#2:
; SKX-NEXT: vpsllw $7, %xmm1, %xmm0
; SKX-NEXT: jmp LBB15_3
; SKX-NEXT: LBB15_1:
; SKX-NEXT: jmp LBB16_3
; SKX-NEXT: LBB16_1:
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
; SKX-NEXT: LBB15_3:
; SKX-NEXT: LBB16_3:
; SKX-NEXT: vpmovb2m %xmm0, %k0
; SKX-NEXT: vpmovm2b %k0, %xmm0
; SKX-NEXT: retq
@ -339,22 +357,22 @@ define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
; KNL-LABEL: test11:
; KNL: ## BB#0:
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: jg LBB17_2
; KNL-NEXT: jg LBB18_2
; KNL-NEXT: ## BB#1:
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: LBB17_2:
; KNL-NEXT: LBB18_2:
; KNL-NEXT: retq
;
; SKX-LABEL: test11:
; SKX: ## BB#0:
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: jg LBB17_1
; SKX-NEXT: jg LBB18_1
; SKX-NEXT: ## BB#2:
; SKX-NEXT: vpslld $31, %xmm1, %xmm0
; SKX-NEXT: jmp LBB17_3
; SKX-NEXT: LBB17_1:
; SKX-NEXT: jmp LBB18_3
; SKX-NEXT: LBB18_1:
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
; SKX-NEXT: LBB17_3:
; SKX-NEXT: LBB18_3:
; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0
; SKX-NEXT: vpmovm2d %k0, %xmm0
; SKX-NEXT: retq
@ -794,11 +812,11 @@ define void @ktest_1(<8 x double> %in, double * %base) {
; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb %al, %al
; KNL-NEXT: je LBB38_2
; KNL-NEXT: je LBB39_2
; KNL-NEXT: ## BB#1: ## %L1
; KNL-NEXT: vmovapd %zmm0, (%rdi)
; KNL-NEXT: retq
; KNL-NEXT: LBB38_2: ## %L2
; KNL-NEXT: LBB39_2: ## %L2
; KNL-NEXT: vmovapd %zmm0, 8(%rdi)
; KNL-NEXT: retq
;
@ -809,11 +827,11 @@ define void @ktest_1(<8 x double> %in, double * %base) {
; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
; SKX-NEXT: ktestb %k0, %k0
; SKX-NEXT: je LBB38_2
; SKX-NEXT: je LBB39_2
; SKX-NEXT: ## BB#1: ## %L1
; SKX-NEXT: vmovapd %zmm0, (%rdi)
; SKX-NEXT: retq
; SKX-NEXT: LBB38_2: ## %L2
; SKX-NEXT: LBB39_2: ## %L2
; SKX-NEXT: vmovapd %zmm0, 8(%rdi)
; SKX-NEXT: retq
%addr1 = getelementptr double, double * %base, i64 0
@ -859,12 +877,12 @@ define void @ktest_2(<32 x float> %in, float * %base) {
; SKX-NEXT: kunpckwd %k1, %k2, %k1
; SKX-NEXT: kord %k1, %k0, %k0
; SKX-NEXT: ktestd %k0, %k0
; SKX-NEXT: je LBB39_2
; SKX-NEXT: je LBB40_2
; SKX-NEXT: ## BB#1: ## %L1
; SKX-NEXT: vmovaps %zmm0, (%rdi)
; SKX-NEXT: vmovaps %zmm1, 64(%rdi)
; SKX-NEXT: retq
; SKX-NEXT: LBB39_2: ## %L2
; SKX-NEXT: LBB40_2: ## %L2
; SKX-NEXT: vmovaps %zmm0, 4(%rdi)
; SKX-NEXT: vmovaps %zmm1, 68(%rdi)
; SKX-NEXT: retq

View File

@ -71,10 +71,8 @@ define <16 x double> @select04(<16 x double> %a, <16 x double> %b) {
define i8 @select05(i8 %a.0, i8 %m) {
; CHECK-LABEL: select05:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k0
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: orl %esi, %edi
; CHECK-NEXT: movb %dil, %al
; CHECK-NEXT: retq
%mask = bitcast i8 %m to <8 x i1>
%a = bitcast i8 %a.0 to <8 x i1>
@ -83,13 +81,28 @@ define i8 @select05(i8 %a.0, i8 %m) {
ret i8 %res;
}
define i8 @select05_mem(<8 x i1>* %a.0, <8 x i1>* %m) {
; CHECK-LABEL: select05_mem:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbw (%rsi), %ax
; CHECK-NEXT: kmovw %eax, %k0
; CHECK-NEXT: movzbw (%rdi), %ax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%mask = load <8 x i1> , <8 x i1>* %m
%a = load <8 x i1> , <8 x i1>* %a.0
%r = select <8 x i1> %mask, <8 x i1> <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>, <8 x i1> %a
%res = bitcast <8 x i1> %r to i8
ret i8 %res;
}
define i8 @select06(i8 %a.0, i8 %m) {
; CHECK-LABEL: select06:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k0
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: movb %dil, %al
; CHECK-NEXT: retq
%mask = bitcast i8 %m to <8 x i1>
%a = bitcast i8 %a.0 to <8 x i1>
@ -98,6 +111,22 @@ define i8 @select06(i8 %a.0, i8 %m) {
ret i8 %res;
}
define i8 @select06_mem(<8 x i1>* %a.0, <8 x i1>* %m) {
; CHECK-LABEL: select06_mem:
; CHECK: ## BB#0:
; CHECK-NEXT: movzbw (%rsi), %ax
; CHECK-NEXT: kmovw %eax, %k0
; CHECK-NEXT: movzbw (%rdi), %ax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: retq
%mask = load <8 x i1> , <8 x i1>* %m
%a = load <8 x i1> , <8 x i1>* %a.0
%r = select <8 x i1> %mask, <8 x i1> %a, <8 x i1> zeroinitializer
%res = bitcast <8 x i1> %r to i8
ret i8 %res;
}
define i8 @select07(i8 %a.0, i8 %b.0, i8 %m) {
; CHECK-LABEL: select07:
; CHECK: ## BB#0:

View File

@ -80,12 +80,11 @@ define void @mask64_mem(i64* %ptr) {
define i32 @mand32(i32 %x, i32 %y) {
; CHECK-LABEL: mand32:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd %edi, %k0
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: kandd %k1, %k0, %k2
; CHECK-NEXT: kxord %k1, %k0, %k0
; CHECK-NEXT: kord %k0, %k2, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: andl %esi, %eax
; CHECK-NEXT: xorl %esi, %edi
; CHECK-NEXT: orl %eax, %edi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%ma = bitcast i32 %x to <32 x i1>
%mb = bitcast i32 %y to <32 x i1>
@ -96,15 +95,33 @@ define i32 @mand32(i32 %x, i32 %y) {
ret i32 %ret
}
define i32 @mand32_mem(<32 x i1>* %x, <32 x i1>* %y) {
; CHECK-LABEL: mand32_mem:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovd (%rdi), %k0
; CHECK-NEXT: kmovd (%rsi), %k1
; CHECK-NEXT: kandd %k1, %k0, %k2
; CHECK-NEXT: kxord %k1, %k0, %k0
; CHECK-NEXT: kord %k0, %k2, %k0
; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: retq
%ma = load <32 x i1>, <32 x i1>* %x
%mb = load <32 x i1>, <32 x i1>* %y
%mc = and <32 x i1> %ma, %mb
%md = xor <32 x i1> %ma, %mb
%me = or <32 x i1> %mc, %md
%ret = bitcast <32 x i1> %me to i32
ret i32 %ret
}
define i64 @mand64(i64 %x, i64 %y) {
; CHECK-LABEL: mand64:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovq %rdi, %k0
; CHECK-NEXT: kmovq %rsi, %k1
; CHECK-NEXT: kandq %k1, %k0, %k2
; CHECK-NEXT: kxorq %k1, %k0, %k0
; CHECK-NEXT: korq %k0, %k2, %k0
; CHECK-NEXT: kmovq %k0, %rax
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: andq %rsi, %rax
; CHECK-NEXT: xorq %rsi, %rdi
; CHECK-NEXT: orq %rax, %rdi
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: retq
%ma = bitcast i64 %x to <64 x i1>
%mb = bitcast i64 %y to <64 x i1>
@ -114,3 +131,22 @@ define i64 @mand64(i64 %x, i64 %y) {
%ret = bitcast <64 x i1> %me to i64
ret i64 %ret
}
define i64 @mand64_mem(<64 x i1>* %x, <64 x i1>* %y) {
; CHECK-LABEL: mand64_mem:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovq (%rdi), %k0
; CHECK-NEXT: kmovq (%rsi), %k1
; CHECK-NEXT: kandq %k1, %k0, %k2
; CHECK-NEXT: kxorq %k1, %k0, %k0
; CHECK-NEXT: korq %k0, %k2, %k0
; CHECK-NEXT: kmovq %k0, %rax
; CHECK-NEXT: retq
%ma = load <64 x i1>, <64 x i1>* %x
%mb = load <64 x i1>, <64 x i1>* %y
%mc = and <64 x i1> %ma, %mb
%md = xor <64 x i1> %ma, %mb
%me = or <64 x i1> %mc, %md
%ret = bitcast <64 x i1> %me to i64
ret i64 %ret
}

View File

@ -32,12 +32,11 @@ define void @mask8_mem(i8* %ptr) {
define i8 @mand8(i8 %x, i8 %y) {
; CHECK-LABEL: mand8:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k0
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: kandb %k1, %k0, %k2
; CHECK-NEXT: kxorb %k1, %k0, %k0
; CHECK-NEXT: korb %k0, %k2, %k0
; CHECK-NEXT: kmovb %k0, %eax
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: xorl %esi, %eax
; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: orl %eax, %edi
; CHECK-NEXT: movb %dil, %al
; CHECK-NEXT: retq
%ma = bitcast i8 %x to <8 x i1>
%mb = bitcast i8 %y to <8 x i1>
@ -47,3 +46,22 @@ define i8 @mand8(i8 %x, i8 %y) {
%ret = bitcast <8 x i1> %me to i8
ret i8 %ret
}
define i8 @mand8_mem(<8 x i1>* %x, <8 x i1>* %y) {
; CHECK-LABEL: mand8_mem:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb (%rdi), %k0
; CHECK-NEXT: kmovb (%rsi), %k1
; CHECK-NEXT: kandb %k1, %k0, %k2
; CHECK-NEXT: kxorb %k1, %k0, %k0
; CHECK-NEXT: korb %k0, %k2, %k0
; CHECK-NEXT: kmovb %k0, %eax
; CHECK-NEXT: retq
%ma = load <8 x i1>, <8 x i1>* %x
%mb = load <8 x i1>, <8 x i1>* %y
%mc = and <8 x i1> %ma, %mb
%md = xor <8 x i1> %ma, %mb
%me = or <8 x i1> %mc, %md
%ret = bitcast <8 x i1> %me to i8
ret i8 %ret
}

View File

@ -1869,48 +1869,34 @@ define <4 x float> @mask_v4f32_4127(<4 x float> %a, <4 x float> %b) {
define <4 x float> @mask_v4f32_0127(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: mask_v4f32_0127:
; SSE2: # BB#0:
; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
; SSE2-NEXT: andps {{.*}}(%rip), %xmm1
; SSE2-NEXT: orps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: mask_v4f32_0127:
; SSE3: # BB#0:
; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
; SSE3-NEXT: andps {{.*}}(%rip), %xmm1
; SSE3-NEXT: orps %xmm1, %xmm0
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: mask_v4f32_0127:
; SSSE3: # BB#0:
; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
; SSSE3-NEXT: andps {{.*}}(%rip), %xmm1
; SSSE3-NEXT: orps %xmm1, %xmm0
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: mask_v4f32_0127:
; SSE41: # BB#0:
; SSE41-NEXT: pxor %xmm2, %xmm2
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,5],xmm2[6,7]
; SSE41-NEXT: por %xmm2, %xmm0
; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
; SSE41-NEXT: retq
;
; AVX1-LABEL: mask_v4f32_0127:
; AVX1: # BB#0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: mask_v4f32_0127:
; AVX2: # BB#0:
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3]
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
; AVX-LABEL: mask_v4f32_0127:
; AVX: # BB#0:
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
; AVX-NEXT: retq
%1 = bitcast <4 x float> %a to <2 x i64>
%2 = bitcast <4 x float> %b to <2 x i64>
%3 = and <2 x i64> %1, <i64 0, i64 -4294967296>
@ -1923,47 +1909,38 @@ define <4 x float> @mask_v4f32_0127(<4 x float> %a, <4 x float> %b) {
define <4 x i32> @mask_v4i32_0127(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: mask_v4i32_0127:
; SSE2: # BB#0:
; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
; SSE2-NEXT: andps {{.*}}(%rip), %xmm1
; SSE2-NEXT: orps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: mask_v4i32_0127:
; SSE3: # BB#0:
; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
; SSE3-NEXT: andps {{.*}}(%rip), %xmm1
; SSE3-NEXT: orps %xmm1, %xmm0
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: mask_v4i32_0127:
; SSSE3: # BB#0:
; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
; SSSE3-NEXT: andps {{.*}}(%rip), %xmm1
; SSSE3-NEXT: orps %xmm1, %xmm0
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: mask_v4i32_0127:
; SSE41: # BB#0:
; SSE41-NEXT: pxor %xmm2, %xmm2
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,5],xmm2[6,7]
; SSE41-NEXT: por %xmm2, %xmm0
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: mask_v4i32_0127:
; AVX1: # BB#0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: mask_v4i32_0127:
; AVX2: # BB#0:
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3]
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
; AVX2-NEXT: retq
%1 = bitcast <4 x i32> %a to <2 x i64>
%2 = bitcast <4 x i32> %b to <2 x i64>

View File

@ -2140,40 +2140,31 @@ define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
define <8 x i16> @mask_v8i16_012345ef(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: mask_v8i16_012345ef:
; SSE2: # BB#0:
; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
; SSE2-NEXT: andps {{.*}}(%rip), %xmm1
; SSE2-NEXT: orps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: mask_v8i16_012345ef:
; SSSE3: # BB#0:
; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
; SSSE3-NEXT: andps {{.*}}(%rip), %xmm1
; SSSE3-NEXT: orps %xmm1, %xmm0
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: mask_v8i16_012345ef:
; SSE41: # BB#0:
; SSE41-NEXT: pxor %xmm2, %xmm2
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,5],xmm2[6,7]
; SSE41-NEXT: por %xmm2, %xmm0
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: mask_v8i16_012345ef:
; AVX1: # BB#0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: mask_v8i16_012345ef:
; AVX2: # BB#0:
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3]
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
; AVX2-NEXT: retq
%1 = bitcast <8 x i16> %a to <2 x i64>
%2 = bitcast <8 x i16> %b to <2 x i64>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff