forked from OSchip/llvm-project
[X86][AVX] Use X86ISD::VPERM2X128 for blend-with-zero if optimizing for size
Last part of PR22984 - avoid the zero-register dependency if optimizing for size
This commit is contained in:
parent
24ac6a2d7d
commit
45aa1b8853
|
@ -34062,6 +34062,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
return DAG.getBitcast(RootVT, V1);
|
||||
}
|
||||
|
||||
bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize();
|
||||
unsigned RootSizeInBits = RootVT.getSizeInBits();
|
||||
unsigned NumRootElts = RootVT.getVectorNumElements();
|
||||
unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
|
||||
|
@ -34205,9 +34206,10 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
|
||||
// If we have AVX2, prefer to use VPERMQ/VPERMPD for unary shuffles unless
|
||||
// we need to use the zeroing feature.
|
||||
// Prefer blends for sequential shuffles unless we are optimizing for size.
|
||||
if (UnaryShuffle &&
|
||||
!(Subtarget.hasAVX2() && isUndefOrInRange(BaseMask, 0, 2)) &&
|
||||
!isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) {
|
||||
(OptForSize || !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0))) {
|
||||
unsigned PermMask = 0;
|
||||
PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
|
||||
PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
|
||||
|
|
|
@ -357,8 +357,8 @@ entry:
|
|||
|
||||
;; Test zero mask generation.
|
||||
;; PR22984: https://llvm.org/bugs/show_bug.cgi?id=22984
|
||||
;; Prefer xor+vblendpd over vperm2f128 because that has better performance.
|
||||
;; TODO: When building for optsize we should use vperm2f128.
|
||||
;; Prefer xor+vblendpd over vperm2f128 because that has better performance,
|
||||
;; unless building for optsize where we should still use vperm2f128.
|
||||
|
||||
define <4 x double> @shuffle_v4f64_zz01(<4 x double> %a) {
|
||||
; ALL-LABEL: shuffle_v4f64_zz01:
|
||||
|
@ -389,8 +389,7 @@ define <4 x double> @shuffle_v4f64_zz23(<4 x double> %a) {
|
|||
define <4 x double> @shuffle_v4f64_zz23_optsize(<4 x double> %a) optsize {
|
||||
; ALL-LABEL: shuffle_v4f64_zz23_optsize:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
||||
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[2,3]
|
||||
; ALL-NEXT: retq
|
||||
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
||||
ret <4 x double> %s
|
||||
|
@ -425,8 +424,7 @@ define <4 x double> @shuffle_v4f64_zz67(<4 x double> %a) {
|
|||
define <4 x double> @shuffle_v4f64_zz67_optsize(<4 x double> %a) optsize {
|
||||
; ALL-LABEL: shuffle_v4f64_zz67_optsize:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
|
||||
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[2,3]
|
||||
; ALL-NEXT: retq
|
||||
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||
ret <4 x double> %s
|
||||
|
|
Loading…
Reference in New Issue