[X86][TLI] SimplifyDemandedVectorEltsForTargetNode(): don't break apart broadcasts from which not just the 0'th elt is demanded

Apparently this has no test coverage before D108382,
but D108382 itself shows a few regressions that this fixes.

It doesn't seem worthwhile breaking apart broadcasts,
assuming we want the broadcasted value to be preset in several elements,
not just the 0'th one.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D108411
This commit is contained in:
Roman Lebedev 2021-09-19 17:38:32 +03:00
parent 07f1d8f0ca
commit 5f2fe48d06
No known key found for this signature in database
GPG Key ID: 083C3EBB4A1689E0
4 changed files with 12 additions and 8 deletions

View File

@ -39978,6 +39978,12 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
}
}
// For broadcasts, unless we *only* demand the 0'th element,
// stop attempts at simplification here, we aren't going to improve things,
// this is better than any potential shuffle.
if (isTargetShuffleSplat(Op) && !DemandedElts.isOneValue())
return false;
// Get target/faux shuffle mask.
APInt OpUndef, OpZero;
SmallVector<int, 64> OpMask;

View File

@ -284,7 +284,7 @@ define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_unary(<4
define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_binary:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,1]
; CHECK-NEXT: vbroadcastsd %xmm1, %ymm1
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; CHECK-NEXT: retq
%r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 4>

View File

@ -752,9 +752,9 @@ define <4 x i32> @sequential_sum_v4i32_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i3
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[3,3,3,3]
; AVX2-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm2 = xmm5[0,1],xmm2[2,3]
; AVX2-SLOW-NEXT: vpbroadcastd %xmm3, %xmm4
; AVX2-SLOW-NEXT: vpbroadcastq %xmm3, %xmm5
; AVX2-SLOW-NEXT: vpaddd %xmm4, %xmm5, %xmm4
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,1,1]
; AVX2-SLOW-NEXT: vpbroadcastd %xmm3, %xmm5
; AVX2-SLOW-NEXT: vpaddd %xmm5, %xmm4, %xmm4
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3]
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,2,2,2]
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3]

View File

@ -645,8 +645,7 @@ define <4 x i32> @pinsrd_from_shufflevector_i32(<4 x i32> %a, <4 x i32>* nocaptu
; X86-AVX1-LABEL: pinsrd_from_shufflevector_i32:
; X86-AVX1: ## %bb.0: ## %entry
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vpermilps $0, (%eax), %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0x08,0x00]
; X86-AVX1-NEXT: ## xmm1 = mem[0,0,0,0]
; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm1 ## encoding: [0xc4,0xe2,0x79,0x18,0x08]
; X86-AVX1-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08]
; X86-AVX1-NEXT: ## xmm0 = xmm0[0,1,2],xmm1[3]
; X86-AVX1-NEXT: retl ## encoding: [0xc3]
@ -669,8 +668,7 @@ define <4 x i32> @pinsrd_from_shufflevector_i32(<4 x i32> %a, <4 x i32>* nocaptu
;
; X64-AVX1-LABEL: pinsrd_from_shufflevector_i32:
; X64-AVX1: ## %bb.0: ## %entry
; X64-AVX1-NEXT: vpermilps $0, (%rdi), %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0x0f,0x00]
; X64-AVX1-NEXT: ## xmm1 = mem[0,0,0,0]
; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm1 ## encoding: [0xc4,0xe2,0x79,0x18,0x0f]
; X64-AVX1-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08]
; X64-AVX1-NEXT: ## xmm0 = xmm0[0,1,2],xmm1[3]
; X64-AVX1-NEXT: retq ## encoding: [0xc3]