forked from OSchip/llvm-project
[DAG] isSplatValue - don't attempt to merge any BITCAST sub elements if they contain UNDEFs
We still haven't found a solution that correctly handles 'don't care' sub elements properly - given how close it is to the next release branch, I'm making this fail safe change and we can revisit this later if we can't find alternatives. NOTE: This isn't a reversion of D128570 - it's the removal of undef handling across bitcasts entirely Fixes #56520
This commit is contained in:
parent
825df62e8b
commit
ac8be21365
|
@ -2712,16 +2712,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
|
|||
SubDemandedElts &= ScaledDemandedElts;
|
||||
if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1))
|
||||
return false;
|
||||
|
||||
// Here we can't do "MatchAnyBits" operation merge for undef bits.
|
||||
// Because some operation only use part value of the source.
|
||||
// Take llvm.fshl.* for example:
|
||||
// t1: v4i32 = Constant:i32<12>, undef:i32, Constant:i32<12>, undef:i32
|
||||
// t2: v2i64 = bitcast t1
|
||||
// t5: v2i64 = fshl t3, t4, t2
|
||||
// We can not convert t2 to {i64 undef, i64 undef}
|
||||
UndefElts |= APIntOps::ScaleBitMask(SubUndefElts, NumElts,
|
||||
/*MatchAllBits=*/true);
|
||||
// TODO: Add support for merging sub undef elements.
|
||||
if (!SubUndefElts.isZero())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -20,16 +20,14 @@
|
|||
define void @test_fshl(<8 x i64> %lo, <8 x i64> %hi, <8 x i64>* %arr) {
|
||||
; CHECK-LABEL: test_fshl:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: movl $63, %eax
|
||||
; CHECK-NEXT: vmovd %eax, %xmm2
|
||||
; CHECK-NEXT: movl $12, %eax
|
||||
; CHECK-NEXT: vmovd %eax, %xmm3
|
||||
; CHECK-NEXT: vpand %xmm2, %xmm3, %xmm2
|
||||
; CHECK-NEXT: vpsllq %xmm2, %zmm1, %zmm1
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: vpsrlq $52, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, (%eax)
|
||||
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm2 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12]
|
||||
; CHECK-NEXT: vpandnq {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm2, %zmm2
|
||||
; CHECK-NEXT: vpsrlq $1, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpsrlvq %zmm2, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpsllq $12, %zmm1, %zmm1
|
||||
; CHECK-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, (%eax)
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retl
|
||||
entry:
|
||||
|
|
|
@ -3353,7 +3353,7 @@ define <2 x i64> @PR55157(ptr %0) {
|
|||
}
|
||||
declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>)
|
||||
|
||||
; FIXME: SelectionDAG::isSplatValue - incorrect handling of undef sub-elements
|
||||
; SelectionDAG::isSplatValue - incorrect handling of undef sub-elements
|
||||
define <2 x i64> @PR56520(<16 x i8> %0) {
|
||||
; SSE-LABEL: PR56520:
|
||||
; SSE: # %bb.0:
|
||||
|
@ -3362,16 +3362,38 @@ define <2 x i64> @PR56520(<16 x i8> %0) {
|
|||
; SSE-NEXT: movd %xmm1, %eax
|
||||
; SSE-NEXT: movsbl %al, %eax
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: PR56520:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vmovd %xmm0, %eax
|
||||
; AVX-NEXT: movsbl %al, %eax
|
||||
; AVX-NEXT: vmovd %eax, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVX1-LABEL: PR56520:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: movsbl %al, %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-SLOW-LABEL: PR56520:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-SLOW-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-SLOW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-SLOW-NEXT: movsbl %al, %eax
|
||||
; AVX2-SLOW-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: PR56520:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-FAST-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
|
||||
; AVX2-FAST-NEXT: vmovd %xmm0, %eax
|
||||
; AVX2-FAST-NEXT: movsbl %al, %eax
|
||||
; AVX2-FAST-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
|
||||
; AVX2-FAST-NEXT: retq
|
||||
%2 = icmp eq <16 x i8> zeroinitializer, %0
|
||||
%3 = extractelement <16 x i1> %2, i64 0
|
||||
%4 = sext i1 %3 to i32
|
||||
|
|
Loading…
Reference in New Issue