forked from OSchip/llvm-project
[X86] canonicalizeBitSelect - don't attempt to canonicalize mask registers
We don't use X86ISD::ANDNP for mask registers. Test case from @craig.topper (Craig Topper) llvm-svn: 356696
This commit is contained in:
parent
525cd59f5a
commit
c2e4405475
|
@ -37266,7 +37266,7 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
|
|||
assert(N->getOpcode() == ISD::OR && "Unexpected Opcode");
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
if (!VT.isVector())
|
||||
if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0)
|
||||
return SDValue();
|
||||
|
||||
SDValue N0 = peekThroughBitcasts(N->getOperand(0));
|
||||
|
|
|
@ -591,3 +591,85 @@ define <8 x i64> @bitselect_v8i64_mm(<8 x i64>* nocapture readonly, <8 x i64>* n
|
|||
%7 = or <8 x i64> %6, %5
|
||||
ret <8 x i64> %7
|
||||
}
|
||||
|
||||
; Check that mask registers don't get canonicalized.
|
||||
define void @bitselect_v4i1_loop(<4 x i32> %a0) {
|
||||
; SSE-LABEL: bitselect_v4i1_loop:
|
||||
; SSE: # %bb.0: # %bb
|
||||
; SSE-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE-NEXT: pcmpeqd %xmm0, %xmm1
|
||||
; SSE-NEXT: pcmpeqd %xmm2, %xmm2
|
||||
; SSE-NEXT: pxor %xmm1, %xmm2
|
||||
; SSE-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: movdqa %xmm1, %xmm3
|
||||
; SSE-NEXT: pandn %xmm0, %xmm3
|
||||
; SSE-NEXT: .p2align 4, 0x90
|
||||
; SSE-NEXT: .LBB12_1: # %bb1
|
||||
; SSE-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; SSE-NEXT: pand %xmm1, %xmm2
|
||||
; SSE-NEXT: por %xmm3, %xmm2
|
||||
; SSE-NEXT: jmp .LBB12_1
|
||||
;
|
||||
; XOP-LABEL: bitselect_v4i1_loop:
|
||||
; XOP: # %bb.0: # %bb
|
||||
; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; XOP-NEXT: vpcomneqd %xmm1, %xmm0, %xmm1
|
||||
; XOP-NEXT: vpcomeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; XOP-NEXT: vmovdqa %xmm1, %xmm2
|
||||
; XOP-NEXT: .p2align 4, 0x90
|
||||
; XOP-NEXT: .LBB12_1: # %bb1
|
||||
; XOP-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; XOP-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm2
|
||||
; XOP-NEXT: jmp .LBB12_1
|
||||
;
|
||||
; AVX1-LABEL: bitselect_v4i1_loop:
|
||||
; AVX1: # %bb.0: # %bb
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX1-NEXT: .p2align 4, 0x90
|
||||
; AVX1-NEXT: .LBB12_1: # %bb1
|
||||
; AVX1-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; AVX1-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm2
|
||||
; AVX1-NEXT: jmp .LBB12_1
|
||||
;
|
||||
; AVX2-LABEL: bitselect_v4i1_loop:
|
||||
; AVX2: # %bb.0: # %bb
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
|
||||
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
|
||||
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [12,12,12,12]
|
||||
; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
|
||||
; AVX2-NEXT: .p2align 4, 0x90
|
||||
; AVX2-NEXT: .LBB12_1: # %bb1
|
||||
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; AVX2-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm2
|
||||
; AVX2-NEXT: jmp .LBB12_1
|
||||
;
|
||||
; AVX512F-LABEL: bitselect_v4i1_loop:
|
||||
; AVX512F: # %bb.0: # %bb
|
||||
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
|
||||
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [12,12,12,12]
|
||||
; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k2
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2}
|
||||
; AVX512F-NEXT: .p2align 4, 0x90
|
||||
; AVX512F-NEXT: .LBB12_1: # %bb1
|
||||
; AVX512F-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 {%k1}
|
||||
; AVX512F-NEXT: korw %k1, %k0, %k1
|
||||
; AVX512F-NEXT: jmp .LBB12_1
|
||||
bb:
|
||||
%tmp = icmp ne <4 x i32> %a0, zeroinitializer
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb1, %bb
|
||||
%tmp2 = phi <4 x i1> [ %tmp, %bb ], [ %tmp4, %bb1 ]
|
||||
%tmp3 = icmp eq <4 x i32> %a0, <i32 12, i32 12, i32 12, i32 12>
|
||||
%tmp4 = select <4 x i1> %tmp, <4 x i1> %tmp3, <4 x i1> %tmp2
|
||||
%tmp5 = and <4 x i1> %tmp4, %tmp
|
||||
br label %bb1
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue