[X86] Remove call to getZeroVector from materializeVectorConstant. Add isel patterns for zero vectors with all types.

The change to avx512-vec-cmp.ll is a regression, but should be
easy to fix. It occurs because the getZeroVector call was
canonicalizing both sides to the same node, then SimplifySelect
was able to simplify it. But since only called getZeroVector
on some VTs this isn't a robust way to combine this.

The change to vector-shuffle-combining-ssse3.ll is more
instructions, but removes a constant pool load so its unclear
if its a regression or not.

llvm-svn: 371350
This commit is contained in:
Craig Topper 2019-09-08 20:56:05 +00:00
parent 6e2c5c8710
commit 9c11901256
5 changed files with 31 additions and 13 deletions

View File

@ -9134,15 +9134,8 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG,
MVT VT = Op.getSimpleValueType();
// Vectors containing all zeros can be matched by pxor and xorps.
if (ISD::isBuildVectorAllZeros(Op.getNode())) {
// Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
// and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
if (VT.isFloatingPoint() ||
VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
return Op;
return getZeroVector(VT, Subtarget, DAG, DL);
}
if (ISD::isBuildVectorAllZeros(Op.getNode()))
return Op;
// Vectors containing all ones can be matched by pcmpeqd on 128-bit width
// vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use

View File

@ -413,6 +413,9 @@ def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
}
let Predicates = [HasAVX512] in {
def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
}
@ -442,8 +445,14 @@ def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
}
let Predicates = [HasAVX512] in {
def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
}

View File

@ -134,7 +134,10 @@ def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
}
let Predicates = [NoAVX512] in {
def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
}
@ -150,6 +153,9 @@ def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
}
let Predicates = [NoAVX512] in {
def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>;
def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>;
def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>;
def : Pat<(v8f32 immAllZerosV), (AVX_SET0)>;
def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>;
}

View File

@ -1101,12 +1101,20 @@ define i16 @pcmpeq_mem_2(<16 x i32> %a, <16 x i32>* %b) {
define <2 x i64> @PR41066(<2 x i64> %t0, <2 x double> %x, <2 x double> %y) {
; AVX512-LABEL: PR41066:
; AVX512: ## %bb.0:
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x57,0xc0]
; AVX512-NEXT: ## kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512-NEXT: vcmpltpd %zmm1, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc9,0x01]
; AVX512-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xc0]
; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; AVX512-NEXT: retq ## encoding: [0xc3]
;
; SKX-LABEL: PR41066:
; SKX: ## %bb.0:
; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0]
; SKX-NEXT: vcmpltpd %xmm1, %xmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x08,0xc2,0xc9,0x01]
; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
; SKX-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0]
; SKX-NEXT: retq ## encoding: [0xc3]
%t1 = fcmp ogt <2 x double> %x, %y
%t2 = select <2 x i1> %t1, <2 x i64> <i64 undef, i64 0>, <2 x i64> zeroinitializer

View File

@ -746,12 +746,14 @@ define <16 x i8> @constant_fold_pshufb() {
define <16 x i8> @constant_fold_pshufb_2() {
; SSE-LABEL: constant_fold_pshufb_2:
; SSE: # %bb.0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; SSE-NEXT: movl $2, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: constant_fold_pshufb_2:
; AVX: # %bb.0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
; AVX-NEXT: movl $2, %eax
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> <i8 2, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
ret <16 x i8> %1