forked from OSchip/llvm-project
[DAG][PowerPC] Enable initial ISD::BITCAST SimplifyDemandedBits/SimplifyMultipleUseDemandedBits big-endian handling
This patch begins extending handling for peeking through bitcast nodes to big-endian targets as well as the existing little-endian case. Differential Revision: https://reviews.llvm.org/D114676
This commit is contained in:
parent
8e757b2383
commit
6803d08c38
|
@ -664,15 +664,15 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
|
|||
Src, DemandedBits, DemandedElts, DAG, Depth + 1))
|
||||
return DAG.getBitcast(DstVT, V);
|
||||
|
||||
// TODO - bigendian once we have test coverage.
|
||||
if (IsLE && SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
|
||||
if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
|
||||
unsigned Scale = NumDstEltBits / NumSrcEltBits;
|
||||
unsigned NumSrcElts = SrcVT.getVectorNumElements();
|
||||
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
|
||||
APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
|
||||
for (unsigned i = 0; i != Scale; ++i) {
|
||||
unsigned Offset = i * NumSrcEltBits;
|
||||
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
|
||||
unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
|
||||
unsigned BitOffset = EltOffset * NumSrcEltBits;
|
||||
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
|
||||
if (!Sub.isZero()) {
|
||||
DemandedSrcBits |= Sub;
|
||||
for (unsigned j = 0; j != NumElts; ++j)
|
||||
|
@ -2166,15 +2166,15 @@ bool TargetLowering::SimplifyDemandedBits(
|
|||
|
||||
// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
|
||||
// Demand the elt/bit if any of the original elts/bits are demanded.
|
||||
// TODO - bigendian once we have test coverage.
|
||||
if (IsLE && SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
|
||||
if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
|
||||
unsigned Scale = BitWidth / NumSrcEltBits;
|
||||
unsigned NumSrcElts = SrcVT.getVectorNumElements();
|
||||
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
|
||||
APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
|
||||
for (unsigned i = 0; i != Scale; ++i) {
|
||||
unsigned Offset = i * NumSrcEltBits;
|
||||
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
|
||||
unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
|
||||
unsigned BitOffset = EltOffset * NumSrcEltBits;
|
||||
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
|
||||
if (!Sub.isZero()) {
|
||||
DemandedSrcBits |= Sub;
|
||||
for (unsigned j = 0; j != NumElts; ++j)
|
||||
|
@ -2193,6 +2193,7 @@ bool TargetLowering::SimplifyDemandedBits(
|
|||
KnownSrcBits, TLO, Depth + 1))
|
||||
return true;
|
||||
} else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
|
||||
// TODO - bigendian once we have test coverage.
|
||||
unsigned Scale = NumSrcEltBits / BitWidth;
|
||||
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
|
||||
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
|
||||
|
|
|
@ -337,15 +337,14 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret(<8 x float>) %a
|
|||
;
|
||||
; CHECK-BE-LABEL: test8elt_signed:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: xxlxor v3, v3, v3
|
||||
; CHECK-BE-NEXT: vmrglh v3, v3, v2
|
||||
; CHECK-BE-NEXT: vmrghh v2, v2, v2
|
||||
; CHECK-BE-NEXT: vmrghh v3, v2, v2
|
||||
; CHECK-BE-NEXT: vmrglh v2, v2, v2
|
||||
; CHECK-BE-NEXT: vextsh2w v3, v3
|
||||
; CHECK-BE-NEXT: vextsh2w v2, v2
|
||||
; CHECK-BE-NEXT: xvcvsxwsp vs0, v3
|
||||
; CHECK-BE-NEXT: xvcvsxwsp vs1, v2
|
||||
; CHECK-BE-NEXT: stxv vs0, 16(r3)
|
||||
; CHECK-BE-NEXT: stxv vs1, 0(r3)
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = sitofp <8 x i16> %a to <8 x float>
|
||||
|
@ -409,25 +408,24 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret(<16 x float>)
|
|||
;
|
||||
; CHECK-BE-LABEL: test16elt_signed:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv v2, 16(r4)
|
||||
; CHECK-BE-NEXT: lxv v3, 0(r4)
|
||||
; CHECK-BE-NEXT: xxlxor v4, v4, v4
|
||||
; CHECK-BE-NEXT: vmrglh v5, v4, v3
|
||||
; CHECK-BE-NEXT: vmrglh v4, v4, v2
|
||||
; CHECK-BE-NEXT: vmrghh v3, v3, v3
|
||||
; CHECK-BE-NEXT: vmrghh v2, v2, v2
|
||||
; CHECK-BE-NEXT: vextsh2w v5, v5
|
||||
; CHECK-BE-NEXT: lxv v2, 16(r4)
|
||||
; CHECK-BE-NEXT: vmrghh v4, v3, v3
|
||||
; CHECK-BE-NEXT: vmrglh v3, v3, v3
|
||||
; CHECK-BE-NEXT: vextsh2w v3, v3
|
||||
; CHECK-BE-NEXT: vextsh2w v4, v4
|
||||
; CHECK-BE-NEXT: xvcvsxwsp vs1, v3
|
||||
; CHECK-BE-NEXT: vmrghh v3, v2, v2
|
||||
; CHECK-BE-NEXT: vmrglh v2, v2, v2
|
||||
; CHECK-BE-NEXT: xvcvsxwsp vs0, v4
|
||||
; CHECK-BE-NEXT: vextsh2w v3, v3
|
||||
; CHECK-BE-NEXT: vextsh2w v2, v2
|
||||
; CHECK-BE-NEXT: xvcvsxwsp vs0, v5
|
||||
; CHECK-BE-NEXT: xvcvsxwsp vs1, v4
|
||||
; CHECK-BE-NEXT: xvcvsxwsp vs2, v3
|
||||
; CHECK-BE-NEXT: xvcvsxwsp vs3, v2
|
||||
; CHECK-BE-NEXT: stxv vs1, 48(r3)
|
||||
; CHECK-BE-NEXT: stxv vs3, 32(r3)
|
||||
; CHECK-BE-NEXT: stxv vs0, 16(r3)
|
||||
; CHECK-BE-NEXT: stxv vs2, 0(r3)
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%a = load <16 x i16>, <16 x i16>* %0, align 32
|
||||
|
|
|
@ -190,7 +190,7 @@ define double @test10(<4 x i32> %a, <4 x i32> %b) {
|
|||
; CHECK-BE-LABEL: test10:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: addis 3, 2, .LCPI9_0@toc@ha
|
||||
; CHECK-BE-NEXT: vmrghw 3, 3, 2
|
||||
; CHECK-BE-NEXT: vmrghw 3, 3, 3
|
||||
; CHECK-BE-NEXT: lfs 0, .LCPI9_0@toc@l(3)
|
||||
; CHECK-BE-NEXT: vmrglw 2, 3, 2
|
||||
; CHECK-BE-NEXT: xsadddp 1, 34, 0
|
||||
|
|
Loading…
Reference in New Issue