[PowerPC] Follow-up to r318436 to get the missed CSE opportunities

The last of the three patches that https://reviews.llvm.org/D40348 was
broken up into.
Canonicalize the materialization of constants so that they are more likely
to be CSE'd regardless of the bit-width of the use. If a constant can be
materialized using PPC::LI, materialize it the same way always.
For example:
  li 4, -1
  li 4, 255
  li 4, 65535
are equivalent if the uses only use the low byte. Canonicalize it to the
first form.

Differential Revision: https://reviews.llvm.org/D40348

llvm-svn: 320473
This commit is contained in:
Nemanja Ivanovic 2017-12-12 12:09:34 +00:00
parent 8d0efdd5db
commit b0783cccb7
6 changed files with 218 additions and 9 deletions

View File

@ -786,8 +786,10 @@ static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
// Simple value.
if (isInt<16>(Imm)) {
uint64_t SextImm = SignExtend64(Lo, 16);
SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
// Just the Lo bits.
Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
} else if (Lo) {
// Handle the Hi bits.
unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
@ -892,12 +894,74 @@ static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl,
getI32Imm(64 - RMin), getI32Imm(MaskEnd));
}
static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
unsigned MaxTruncation = 0;
// Cannot use range-based for loop here as we need the actual use (i.e. we
// need the operand number corresponding to the use). A range-based for
// will unbox the use and provide an SDNode*.
for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
Use != UseEnd; ++Use) {
unsigned Opc =
Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
switch (Opc) {
default: return 0;
case ISD::TRUNCATE:
if (Use->isMachineOpcode())
return 0;
MaxTruncation =
std::max(MaxTruncation, Use->getValueType(0).getSizeInBits());
continue;
case ISD::STORE: {
if (Use->isMachineOpcode())
return 0;
StoreSDNode *STN = cast<StoreSDNode>(*Use);
unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
if (MemVTSize == 64 || Use.getOperandNo() != 0)
return 0;
MaxTruncation = std::max(MaxTruncation, MemVTSize);
continue;
}
case PPC::STW8:
case PPC::STWX8:
case PPC::STWU8:
case PPC::STWUX8:
if (Use.getOperandNo() != 0)
return 0;
MaxTruncation = std::max(MaxTruncation, 32u);
continue;
case PPC::STH8:
case PPC::STHX8:
case PPC::STHU8:
case PPC::STHUX8:
if (Use.getOperandNo() != 0)
return 0;
MaxTruncation = std::max(MaxTruncation, 16u);
continue;
case PPC::STB8:
case PPC::STBX8:
case PPC::STBU8:
case PPC::STBUX8:
if (Use.getOperandNo() != 0)
return 0;
MaxTruncation = std::max(MaxTruncation, 8u);
continue;
}
}
return MaxTruncation;
}
// Select a 64-bit constant.
static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
SDLoc dl(N);
// Get 64 bit value.
int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
uint64_t SextImm = SignExtend64(Imm, MinSize);
SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
if (isInt<16>(SextImm))
return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
}
return selectI64Imm(CurDAG, dl, Imm);
}

View File

@ -1,5 +1,13 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -verify-machineinstrs | FileCheck %s
@CVal = external local_unnamed_addr global i8, align 1
@SVal = external local_unnamed_addr global i16, align 2
@IVal = external local_unnamed_addr global i32, align 4
@LVal = external local_unnamed_addr global i64, align 8
@USVal = external local_unnamed_addr global i16, align 2
@arr = external local_unnamed_addr global i64*, align 8
@arri = external local_unnamed_addr global i32*, align 8
; Test the same constant can be used by different stores.
%struct.S = type { i64, i8, i16, i32 }
@ -42,3 +50,142 @@ define void @bar(%struct.S* %p) {
; CHECK: stb 4, 8(3)
}
; Function Attrs: norecurse nounwind
define void @setSmallNeg() {
entry:
store i8 -7, i8* @CVal, align 1
store i16 -7, i16* @SVal, align 2
store i32 -7, i32* @IVal, align 4
store i64 -7, i64* @LVal, align 8
ret void
; CHECK-LABEL: setSmallNeg
; CHECK: li 7, -7
; CHECK-DAG: stb 7,
; CHECK-DAG: sth 7,
; CHECK-DAG: stw 7,
; CHECK-DAG: std 7,
}
; Function Attrs: norecurse nounwind
define void @setSmallPos() {
entry:
store i8 8, i8* @CVal, align 1
store i16 8, i16* @SVal, align 2
store i32 8, i32* @IVal, align 4
store i64 8, i64* @LVal, align 8
ret void
; CHECK-LABEL: setSmallPos
; CHECK: li 7, 8
; CHECK-DAG: stb 7,
; CHECK-DAG: sth 7,
; CHECK-DAG: stw 7,
; CHECK-DAG: std 7,
}
; Function Attrs: norecurse nounwind
define void @setMaxNeg() {
entry:
store i16 -32768, i16* @SVal, align 2
store i32 -32768, i32* @IVal, align 4
store i64 -32768, i64* @LVal, align 8
ret void
; CHECK-LABEL: setMaxNeg
; CHECK: li 6, -32768
; CHECK-DAG: sth 6,
; CHECK-DAG: stw 6,
; CHECK-DAG: std 6,
}
; Function Attrs: norecurse nounwind
define void @setMaxPos() {
entry:
store i16 32767, i16* @SVal, align 2
store i32 32767, i32* @IVal, align 4
store i64 32767, i64* @LVal, align 8
ret void
; CHECK-LABEL: setMaxPos
; CHECK: li 6, 32767
; CHECK-DAG: sth 6,
; CHECK-DAG: stw 6,
; CHECK-DAG: std 6,
}
; Function Attrs: norecurse nounwind
define void @setExcessiveNeg() {
entry:
store i32 -32769, i32* @IVal, align 4
store i64 -32769, i64* @LVal, align 8
ret void
; CHECK-LABEL: setExcessiveNeg
; CHECK: lis 5, -1
; CHECK: ori 5, 5, 32767
; CHECK-DAG: stw 5,
; CHECK-DAG: std 5,
}
; Function Attrs: norecurse nounwind
define void @setExcessivePos() {
entry:
store i16 -32768, i16* @USVal, align 2
store i32 32768, i32* @IVal, align 4
store i64 32768, i64* @LVal, align 8
ret void
; CHECK-LABEL: setExcessivePos
; CHECK: li 6, 0
; CHECK: ori 6, 6, 32768
; CHECK-DAG: sth 6,
; CHECK-DAG: stw 6,
; CHECK-DAG: std 6,
}
define void @SetArr(i32 signext %Len) {
entry:
%cmp7 = icmp sgt i32 %Len, 0
br i1 %cmp7, label %for.body.lr.ph, label %for.cond.cleanup
for.body.lr.ph: ; preds = %entry
%0 = load i64*, i64** @arr, align 8
%1 = load i32*, i32** @arri, align 8
%wide.trip.count = zext i32 %Len to i64
br label %for.body
for.cond.cleanup: ; preds = %for.body, %entry
ret void
for.body: ; preds = %for.body, %for.body.lr.ph
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i64, i64* %0, i64 %indvars.iv
store i64 -7, i64* %arrayidx, align 8
%arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
store i32 -7, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond, label %for.cond.cleanup, label %for.body
; CHECK-LABEL: SetArr
; CHECK: li 5, -7
; CHECK: stdu 5, 8(3)
; CHECK: stwu 5, 4(4)
}
define void @setSameValDiffSizeCI() {
entry:
store i32 255, i32* @IVal, align 4
store i8 -1, i8* @CVal, align 1
ret void
; CHECK-LABEL: setSameValDiffSizeCI
; CHECK: li 5, 255
; CHECK-DAG: stb 5,
; CHECK-DAG: stw 5,
}
define void @setSameValDiffSizeSI() {
entry:
store i32 65535, i32* @IVal, align 4
store i16 -1, i16* @SVal, align 2
ret void
; CHECK-LABEL: setSameValDiffSizeSI
; CHECK: li 5, 0
; CHECK: ori 5, 5, 65535
; CHECK-DAG: sth 5,
; CHECK-DAG: stw 5,
}

View File

@ -106,7 +106,7 @@ entry:
store i8 %conv3, i8* @glob
ret void
; CHECK-LABEL: @test_igeuc_sext_z_store
; CHECK: li [[REG1:r[0-9]+]], 255
; CHECK: li [[REG1:r[0-9]+]], -1
; CHECK: stb [[REG1]]
; CHECK: blr
}

View File

@ -105,9 +105,8 @@ entry:
store i16 %conv3, i16* @glob
ret void
; CHECK-LABEL: @test_igeus_sext_z_store
; CHECK: li [[REG1:r[0-9]+]], 0
; CHECK: ori [[REG2:r[0-9]+]], [[REG1]], 65535
; CHECK: sth [[REG2]]
; CHECK: li [[REG1:r[0-9]+]], -1
; CHECK: sth [[REG1]]
; CHECK: blr
}

View File

@ -105,7 +105,7 @@ entry:
store i8 %conv1, i8* @glob
ret void
; CHECK-LABEL: @test_llgeuc_sext_z_store
; CHECK: li [[REG1:r[0-9]+]], 255
; CHECK: li [[REG1:r[0-9]+]], -1
; CHECK: stb [[REG1]]
; CHECK: blr
}

View File

@ -105,9 +105,8 @@ entry:
store i16 %conv1, i16* @glob
ret void
; CHECK-LABEL: @test_llgeus_sext_z_store
; CHECK: li [[REG1:r[0-9]+]], 0
; CHECK: ori [[REG2:r[0-9]+]], [[REG1]], 65535
; CHECK: sth [[REG2]]
; CHECK: li [[REG1:r[0-9]+]], -1
; CHECK: sth [[REG1]]
; CHECK: blr
}