[PPC][DAGCombine] Convert SETCC to subtract when the result is zero extended

When we see a SETCC whose only users are zero extend operations, we can replace
it with a subtraction. This results in doing all calculations in GPRs and
avoids CR use.

Currently we do this only for ULT, ULE, UGT and UGE condition codes. There are
ways that this can be extended. For example for signed condition codes. In that
case we will be introducing additional sign extend instructions, so more careful
profitability analysis may be required.

Another direction to extend this is for equal, not equal conditions. Also when
users of SETCC are any_ext or sign_ext, we might be able to do something 
similar.

llvm-svn: 287329
This commit is contained in:
Ehsan Amiri 2016-11-18 10:41:44 +00:00
parent c00c9a9f61
commit 85818684c6
3 changed files with 184 additions and 1 deletions

View File

@ -9976,6 +9976,87 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
return false;
}
/// This function is called when we have proved that a SETCC node can be replaced
/// by subtraction (and other supporting instructions) so that the result of
/// comparison is kept in a GPR instead of CR. This function is purely for
/// codegen purposes and has some flags to guide the codegen process.
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
bool Swap, SDLoc &DL, SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
// Zero extend the operands to the largest legal integer. Originally, they
// must be of a strictly smaller size.
auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
DAG.getConstant(Size, DL, MVT::i32));
auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
DAG.getConstant(Size, DL, MVT::i32));
// Swap if needed. Depends on the condition code.
if (Swap)
std::swap(Op0, Op1);
// Subtract extended integers.
auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
// Move the sign bit to the least significant position and zero out the rest.
// Now the least significant bit carries the result of original comparison.
auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
DAG.getConstant(Size - 1, DL, MVT::i32));
auto Final = Shifted;
// Complement the result if needed. Based on the condition code.
if (Complement)
Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
DAG.getConstant(1, DL, MVT::i64));
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
}
SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
DAGCombinerInfo &DCI) const {
assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
// Size of integers being compared has a critical role in the following
// analysis, so we prefer to do this when all types are legal.
if (!DCI.isAfterLegalizeVectorOps())
return SDValue();
// If all users of SETCC extend its value to a legal integer type
// then we replace SETCC with a subtraction
for (SDNode::use_iterator UI = N->use_begin(),
UE = N->use_end(); UI != UE; ++UI) {
if (UI->getOpcode() != ISD::ZERO_EXTEND)
return SDValue();
}
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
auto OpSize = N->getOperand(0).getValueSizeInBits();
unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
if (OpSize < Size) {
switch (CC) {
default: break;
case ISD::SETULT:
return generateEquivalentSub(N, Size, false, false, DL, DAG);
case ISD::SETULE:
return generateEquivalentSub(N, Size, true, true, DL, DAG);
case ISD::SETUGT:
return generateEquivalentSub(N, Size, false, true, DL, DAG);
case ISD::SETUGE:
return generateEquivalentSub(N, Size, true, false, DL, DAG);
}
}
return SDValue();
}
SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@ -10017,7 +10098,8 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
APInt::getHighBitsSet(OpBits, OpBits-1)) ||
!DAG.MaskedValueIsZero(N->getOperand(1),
APInt::getHighBitsSet(OpBits, OpBits-1)))
return SDValue();
return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
: SDValue());
} else {
// This is neither a signed nor an unsigned comparison, just make sure
// that the high bits are equal.

View File

@ -977,6 +977,11 @@ namespace llvm {
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
/// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
/// SETCC with integer subtraction when (1) there is a legal way of doing it
/// (2) keeping the result of comparison in GPR has performance benefit.
SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps, bool &UseOneConstNR,
bool Reciprocal) const override;

View File

@ -0,0 +1,96 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 < %s | FileCheck %s
%class.PB2 = type { [1 x i32], %class.PB1* }
%class.PB1 = type { [1 x i32], i64, i64, i32 }
; Function Attrs: norecurse nounwind readonly
define zeroext i1 @test1(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
entry:
%arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
%0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
%and.i = and i32 %0, 8
%arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
%1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
%and.i4 = and i32 %1, 8
%cmp.i5 = icmp ult i32 %and.i, %and.i4
ret i1 %cmp.i5
; CHECK-LABEL: @test1
; CHECK: rlwinm [[REG1:[0-9]*]]
; CHECK-NEXT: rlwinm [[REG2:[0-9]*]]
; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG1]], [[REG2]]
; CHECK-NEXT: rldicl 3, [[REG3]]
; CHECK: blr
}
; Function Attrs: norecurse nounwind readonly
define zeroext i1 @test2(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
entry:
%arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
%0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
%and.i = and i32 %0, 8
%arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
%1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
%and.i4 = and i32 %1, 8
%cmp.i5 = icmp ule i32 %and.i, %and.i4
ret i1 %cmp.i5
; CHECK-LABEL: @test2
; CHECK: rlwinm [[REG1:[0-9]*]]
; CHECK-NEXT: rlwinm [[REG2:[0-9]*]]
; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG2]], [[REG1]]
; CHECK-NEXT: rldicl [[REG4:[0-9]*]], [[REG3]]
; CHECK-NEXT: xori 3, [[REG4]], 1
; CHECK: blr
}
; Function Attrs: norecurse nounwind readonly
define zeroext i1 @test3(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
entry:
%arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
%0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
%and.i = and i32 %0, 8
%arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
%1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
%and.i4 = and i32 %1, 8
%cmp.i5 = icmp ugt i32 %and.i, %and.i4
ret i1 %cmp.i5
; CHECK-LABEL: @test3
; CHECK: rlwinm [[REG1:[0-9]*]]
; CHECK-NEXT: rlwinm [[REG2:[0-9]*]]
; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG2]], [[REG1]]
; CHECK-NEXT: rldicl 3, [[REG3]]
; CHECK: blr
}
; Function Attrs: norecurse nounwind readonly
define zeroext i1 @test4(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr #0 {
entry:
%arrayidx.i6 = bitcast %class.PB2* %s_a to i32*
%0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1
%and.i = and i32 %0, 8
%arrayidx.i37 = bitcast %class.PB2* %s_b to i32*
%1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1
%and.i4 = and i32 %1, 8
%cmp.i5 = icmp uge i32 %and.i, %and.i4
ret i1 %cmp.i5
; CHECK-LABEL: @test4
; CHECK: rlwinm [[REG1:[0-9]*]]
; CHECK-NEXT: rlwinm [[REG2:[0-9]*]]
; CHECK-NEXT: sub [[REG3:[0-9]*]], [[REG1]], [[REG2]]
; CHECK-NEXT: rldicl [[REG4:[0-9]*]], [[REG3]]
; CHECK-NEXT: xori 3, [[REG4]], 1
; CHECK: blr
}
!1 = !{!2, !2, i64 0}
!2 = !{!"int", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C++ TBAA"}