forked from OSchip/llvm-project
[X86][AVX] Split VZEXT_MOVL ymm/zmm if the upper elements are not demanded.
Removes unnecessary vzeroupper noted in D61806 llvm-svn: 360543
This commit is contained in:
parent
2de619099a
commit
a7fc763082
|
@ -33593,6 +33593,18 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
|||
ExtSizeInBits = SizeInBits / 4;
|
||||
|
||||
switch (Opc) {
|
||||
// Zero upper elements.
|
||||
case X86ISD::VZEXT_MOVL: {
|
||||
SDLoc DL(Op);
|
||||
SDValue Ext0 =
|
||||
extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
SDValue ExtOp =
|
||||
TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0);
|
||||
SDValue UndefVec = TLO.DAG.getUNDEF(VT);
|
||||
SDValue Insert =
|
||||
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
return TLO.CombineTo(Op, Insert);
|
||||
}
|
||||
// Byte shifts by immediate.
|
||||
case X86ISD::VSHLDQ:
|
||||
case X86ISD::VSRLDQ:
|
||||
|
|
|
@ -586,8 +586,6 @@ define <8 x float> @PR41512_v8f32(float %x, float %y) {
|
|||
;
|
||||
; AVX-LABEL: PR41512_v8f32:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
|
||||
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||||
; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
|
||||
; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
|
||||
|
|
|
@ -1449,7 +1449,6 @@ define i32 @sad_unroll_nonzero_initial(<16 x i8>* %arg, <16 x i8>* %arg1, <16 x
|
|||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: sad_unroll_nonzero_initial:
|
||||
|
|
Loading…
Reference in New Issue