forked from OSchip/llvm-project
[X86][TLI] SimplifyDemandedVectorEltsForTargetNode(): don't break apart broadcasts from which not just the 0'th elt is demanded
Apparently this has no test coverage before D108382, but D108382 itself shows a few regressions that this fixes. It doesn't seem worthwhile breaking apart broadcasts, assuming we want the broadcasted value to be preset in several elements, not just the 0'th one. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D108411
This commit is contained in:
parent
07f1d8f0ca
commit
5f2fe48d06
|
@ -39978,6 +39978,12 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
|||
}
|
||||
}
|
||||
|
||||
// For broadcasts, unless we *only* demand the 0'th element,
|
||||
// stop attempts at simplification here, we aren't going to improve things,
|
||||
// this is better than any potential shuffle.
|
||||
if (isTargetShuffleSplat(Op) && !DemandedElts.isOneValue())
|
||||
return false;
|
||||
|
||||
// Get target/faux shuffle mask.
|
||||
APInt OpUndef, OpZero;
|
||||
SmallVector<int, 64> OpMask;
|
||||
|
|
|
@ -284,7 +284,7 @@ define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_unary(<4
|
|||
define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_binary(<4 x i64> %x, <4 x i64> %y) nounwind {
|
||||
; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_binary:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,1]
|
||||
; CHECK-NEXT: vbroadcastsd %xmm1, %ymm1
|
||||
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
|
||||
; CHECK-NEXT: retq
|
||||
%r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 4>
|
||||
|
|
|
@ -752,9 +752,9 @@ define <4 x i32> @sequential_sum_v4i32_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i3
|
|||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[3,3,3,3]
|
||||
; AVX2-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
|
||||
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm2 = xmm5[0,1],xmm2[2,3]
|
||||
; AVX2-SLOW-NEXT: vpbroadcastd %xmm3, %xmm4
|
||||
; AVX2-SLOW-NEXT: vpbroadcastq %xmm3, %xmm5
|
||||
; AVX2-SLOW-NEXT: vpaddd %xmm4, %xmm5, %xmm4
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,1,1]
|
||||
; AVX2-SLOW-NEXT: vpbroadcastd %xmm3, %xmm5
|
||||
; AVX2-SLOW-NEXT: vpaddd %xmm5, %xmm4, %xmm4
|
||||
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3]
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,2,2,2]
|
||||
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3]
|
||||
|
|
|
@ -645,8 +645,7 @@ define <4 x i32> @pinsrd_from_shufflevector_i32(<4 x i32> %a, <4 x i32>* nocaptu
|
|||
; X86-AVX1-LABEL: pinsrd_from_shufflevector_i32:
|
||||
; X86-AVX1: ## %bb.0: ## %entry
|
||||
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-AVX1-NEXT: vpermilps $0, (%eax), %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0x08,0x00]
|
||||
; X86-AVX1-NEXT: ## xmm1 = mem[0,0,0,0]
|
||||
; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm1 ## encoding: [0xc4,0xe2,0x79,0x18,0x08]
|
||||
; X86-AVX1-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08]
|
||||
; X86-AVX1-NEXT: ## xmm0 = xmm0[0,1,2],xmm1[3]
|
||||
; X86-AVX1-NEXT: retl ## encoding: [0xc3]
|
||||
|
@ -669,8 +668,7 @@ define <4 x i32> @pinsrd_from_shufflevector_i32(<4 x i32> %a, <4 x i32>* nocaptu
|
|||
;
|
||||
; X64-AVX1-LABEL: pinsrd_from_shufflevector_i32:
|
||||
; X64-AVX1: ## %bb.0: ## %entry
|
||||
; X64-AVX1-NEXT: vpermilps $0, (%rdi), %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0x0f,0x00]
|
||||
; X64-AVX1-NEXT: ## xmm1 = mem[0,0,0,0]
|
||||
; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm1 ## encoding: [0xc4,0xe2,0x79,0x18,0x0f]
|
||||
; X64-AVX1-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08]
|
||||
; X64-AVX1-NEXT: ## xmm0 = xmm0[0,1,2],xmm1[3]
|
||||
; X64-AVX1-NEXT: retq ## encoding: [0xc3]
|
||||
|
|
Loading…
Reference in New Issue