forked from OSchip/llvm-project
[PowerPC] Combine ADD to ADDZE
On the ppc64le platform, if ir has the following form, define i64 @addze1(i64 %x, i64 %z) local_unnamed_addr #0 { entry: %cmp = icmp ne i64 %z, CONSTANT (-32767 <= CONSTANT <= 32768) %conv1 = zext i1 %cmp to i64 %add = add nsw i64 %conv1, %x ret i64 %add } we can optimize it to the form below. when C == 0 --> addze X, (addic Z, -1)) / add X, (zext(setne Z, C))-- \ when -32768 <= -C <= 32767 && C != 0 --> addze X, (addic (addi Z, -C), -1) Patch By: HLJ2009 (Li Jia He) Differential Revision: https://reviews.llvm.org/D51403 Reviewed By: Nemanjai llvm-svn: 341634
This commit is contained in:
parent
9e6845d8e1
commit
abbb894ff5
|
@ -1055,6 +1055,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
|||
setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
|
||||
|
||||
// We have target-specific dag combine patterns for the following nodes:
|
||||
setTargetDAGCombine(ISD::ADD);
|
||||
setTargetDAGCombine(ISD::SHL);
|
||||
setTargetDAGCombine(ISD::SRA);
|
||||
setTargetDAGCombine(ISD::SRL);
|
||||
|
@ -12470,6 +12471,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
SDLoc dl(N);
|
||||
switch (N->getOpcode()) {
|
||||
default: break;
|
||||
case ISD::ADD:
|
||||
return combineADD(N, DCI);
|
||||
case ISD::SHL:
|
||||
return combineSHL(N, DCI);
|
||||
case ISD::SRA:
|
||||
|
@ -14176,6 +14179,100 @@ SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
|
||||
// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
|
||||
// When C is zero, the equation (addi Z, -C) can be simplified to Z
|
||||
// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
|
||||
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
|
||||
const PPCSubtarget &Subtarget) {
|
||||
if (!Subtarget.isPPC64())
|
||||
return SDValue();
|
||||
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = N->getOperand(1);
|
||||
|
||||
auto isZextOfCompareWithConstant = [](SDValue Op) {
|
||||
if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
|
||||
Op.getValueType() != MVT::i64)
|
||||
return false;
|
||||
|
||||
SDValue Cmp = Op.getOperand(0);
|
||||
if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
|
||||
Cmp.getOperand(0).getValueType() != MVT::i64)
|
||||
return false;
|
||||
|
||||
if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
|
||||
int64_t NegConstant = 0 - Constant->getSExtValue();
|
||||
// Due to the limitations of the addi instruction,
|
||||
// -C is required to be [-32768, 32767].
|
||||
return isInt<16>(NegConstant);
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
|
||||
bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
|
||||
|
||||
// If there is a pattern, canonicalize a zext operand to the RHS.
|
||||
if (LHSHasPattern && !RHSHasPattern)
|
||||
std::swap(LHS, RHS);
|
||||
else if (!LHSHasPattern && !RHSHasPattern)
|
||||
return SDValue();
|
||||
|
||||
SDLoc DL(N);
|
||||
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i64);
|
||||
SDValue Cmp = RHS.getOperand(0);
|
||||
SDValue Z = Cmp.getOperand(0);
|
||||
auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
|
||||
|
||||
assert(Constant && "Constant Should not be a null pointer.");
|
||||
int64_t NegConstant = 0 - Constant->getSExtValue();
|
||||
|
||||
switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
|
||||
default: break;
|
||||
case ISD::SETNE: {
|
||||
// when C == 0
|
||||
// --> addze X, (addic Z, -1).carry
|
||||
// /
|
||||
// add X, (zext(setne Z, C))--
|
||||
// \ when -32768 <= -C <= 32767 && C != 0
|
||||
// --> addze X, (addic (addi Z, -C), -1).carry
|
||||
SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
|
||||
DAG.getConstant(NegConstant, DL, MVT::i64));
|
||||
SDValue AddOrZ = NegConstant != 0 ? Add : Z;
|
||||
SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
|
||||
AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
|
||||
return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
|
||||
SDValue(Addc.getNode(), 1));
|
||||
}
|
||||
case ISD::SETEQ: {
|
||||
// when C == 0
|
||||
// --> addze X, (subfic Z, 0).carry
|
||||
// /
|
||||
// add X, (zext(sete Z, C))--
|
||||
// \ when -32768 <= -C <= 32767 && C != 0
|
||||
// --> addze X, (subfic (addi Z, -C), 0).carry
|
||||
SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
|
||||
DAG.getConstant(NegConstant, DL, MVT::i64));
|
||||
SDValue AddOrZ = NegConstant != 0 ? Add : Z;
|
||||
SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
|
||||
DAG.getConstant(0, DL, MVT::i64), AddOrZ);
|
||||
return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
|
||||
SDValue(Subc.getNode(), 1));
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
|
||||
if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
|
||||
return Value;
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
|
||||
// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
|
||||
if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
|
||||
|
|
|
@ -1092,6 +1092,7 @@ namespace llvm {
|
|||
SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
||||
/// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
|
||||
/// SETCC with integer subtraction when (1) there is a legal way of doing it
|
||||
|
|
|
@ -0,0 +1,172 @@
|
|||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
|
||||
; RUN: -ppc-asm-full-reg-names -mcpu=pwr9 < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
|
||||
; RUN: -ppc-asm-full-reg-names -mcpu=pwr9 < %s | FileCheck %s
|
||||
|
||||
define i64 @addze1(i64 %X, i64 %Z) {
|
||||
; CHECK-LABEL: addze1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addic [[REG1:r[0-9]+]], [[REG1]], -1
|
||||
; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
|
||||
; CHECK-NEXT: blr
|
||||
%cmp = icmp ne i64 %Z, 0
|
||||
%conv1 = zext i1 %cmp to i64
|
||||
%add = add nsw i64 %conv1, %X
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
define i64 @addze2(i64 %X, i64 %Z) {
|
||||
; CHECK-LABEL: addze2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: subfic [[REG1:r[0-9]+]], [[REG1]], 0
|
||||
; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
|
||||
; CHECK-NEXT: blr
|
||||
%cmp = icmp eq i64 %Z, 0
|
||||
%conv1 = zext i1 %cmp to i64
|
||||
%add = add nsw i64 %conv1, %X
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
define i64 @addze3(i64 %X, i64 %Z) {
|
||||
; CHECK-LABEL: addze3:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG1]], -32768
|
||||
; CHECK-NEXT: addic [[REG1]], [[REG1]], -1
|
||||
; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
|
||||
; CHECK-NEXT: blr
|
||||
%cmp = icmp ne i64 %Z, 32768
|
||||
%conv1 = zext i1 %cmp to i64
|
||||
%add = add nsw i64 %conv1, %X
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
define i64 @addze4(i64 %X, i64 %Z) {
|
||||
; CHECK-LABEL: addze4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG1]], -32768
|
||||
; CHECK-NEXT: subfic [[REG1]], [[REG1]], 0
|
||||
; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
|
||||
; CHECK-NEXT: blr
|
||||
%cmp = icmp eq i64 %Z, 32768
|
||||
%conv1 = zext i1 %cmp to i64
|
||||
%add = add nsw i64 %conv1, %X
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
define i64 @addze5(i64 %X, i64 %Z) {
|
||||
; CHECK-LABEL: addze5:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG1]], 32767
|
||||
; CHECK-NEXT: addic [[REG1]], [[REG1]], -1
|
||||
; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
|
||||
; CHECK-NEXT: blr
|
||||
%cmp = icmp ne i64 %Z, -32767
|
||||
%conv1 = zext i1 %cmp to i64
|
||||
%add = add nsw i64 %conv1, %X
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
define i64 @addze6(i64 %X, i64 %Z) {
|
||||
; CHECK-LABEL: addze6:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi [[REG1:r[0-9]+]], [[REG1]], 32767
|
||||
; CHECK-NEXT: subfic [[REG1]], [[REG1]], 0
|
||||
; CHECK-NEXT: addze [[REG2:r[0-9]+]], [[REG2]]
|
||||
; CHECK-NEXT: blr
|
||||
%cmp = icmp eq i64 %Z, -32767
|
||||
%conv1 = zext i1 %cmp to i64
|
||||
%add = add nsw i64 %conv1, %X
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
; element is out of range
|
||||
define i64 @test1(i64 %X, i64 %Z) {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: li [[REG1:r[0-9]+]], -32768
|
||||
; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1]]
|
||||
; CHECK-NEXT: addic [[REG1]], [[REG2]], -1
|
||||
; CHECK-NEXT: subfe [[REG2]], [[REG1]], [[REG2]]
|
||||
; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
|
||||
; CHECK-NEXT: blr
|
||||
%cmp = icmp ne i64 %Z, -32768
|
||||
%conv1 = zext i1 %cmp to i64
|
||||
%add = add nsw i64 %conv1, %X
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
define i64 @test2(i64 %X, i64 %Z) {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: li [[REG1:r[0-9]+]], -32768
|
||||
; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1]]
|
||||
; CHECK-NEXT: cntlzd [[REG2]], [[REG2]]
|
||||
; CHECK-NEXT: rldicl [[REG2]], [[REG2]], 58, 63
|
||||
; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
|
||||
; CHECK-NEXT: blr
|
||||
%cmp = icmp eq i64 %Z, -32768
|
||||
%conv1 = zext i1 %cmp to i64
|
||||
%add = add nsw i64 %conv1, %X
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
define i64 @test3(i64 %X, i64 %Z) {
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: li [[REG1:r[0-9]+]], 0
|
||||
; CHECK-NEXT: ori [[REG1]], [[REG1]], 32769
|
||||
; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1]]
|
||||
; CHECK-NEXT: addic [[REG1]], [[REG2]], -1
|
||||
; CHECK-NEXT: subfe [[REG2]], [[REG1]], [[REG2]]
|
||||
; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
|
||||
; CHECK-NEXT: blr
|
||||
%cmp = icmp ne i64 %Z, 32769
|
||||
%conv1 = zext i1 %cmp to i64
|
||||
%add = add nsw i64 %conv1, %X
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
define i64 @test4(i64 %X, i64 %Z) {
|
||||
; CHECK-LABEL: test4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: li [[REG1:r[0-9]+]], 0
|
||||
; CHECK-NEXT: ori [[REG1]], [[REG1]], 32769
|
||||
; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1]]
|
||||
; CHECK-NEXT: cntlzd [[REG2]], [[REG2]]
|
||||
; CHECK-NEXT: rldicl [[REG2]], [[REG2]], 58, 63
|
||||
; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
|
||||
; CHECK-NEXT: blr
|
||||
%cmp = icmp eq i64 %Z, 32769
|
||||
%conv1 = zext i1 %cmp to i64
|
||||
%add = add nsw i64 %conv1, %X
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
; comparison of two registers
|
||||
define i64 @test5(i64 %X, i64 %Y, i64 %Z) {
|
||||
; CHECK-LABEL: test5:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1:r[0-9]+]]
|
||||
; CHECK-NEXT: addic [[REG1]], [[REG2]], -1
|
||||
; CHECK-NEXT: subfe [[REG2]], [[REG1]], [[REG2]]
|
||||
; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
|
||||
; CHECK-NEXT: blr
|
||||
%cmp = icmp ne i64 %Y, %Z
|
||||
%conv1 = zext i1 %cmp to i64
|
||||
%add = add nsw i64 %conv1, %X
|
||||
ret i64 %add
|
||||
}
|
||||
|
||||
define i64 @test6(i64 %X, i64 %Y, i64 %Z) {
|
||||
; CHECK-LABEL: test6:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: xor [[REG2:r[0-9]+]], [[REG2]], [[REG1:r[0-9]+]]
|
||||
; CHECK-NEXT: cntlzd [[REG2]], [[REG2]]
|
||||
; CHECK-NEXT: rldicl [[REG2]], [[REG2]], 58, 63
|
||||
; CHECK-NEXT: add [[REG3:r[0-9]+]], [[REG2]], [[REG3]]
|
||||
; CHECK-NEXT: blr
|
||||
%cmp = icmp eq i64 %Y, %Z
|
||||
%conv1 = zext i1 %cmp to i64
|
||||
%add = add nsw i64 %conv1, %X
|
||||
ret i64 %add
|
||||
}
|
Loading…
Reference in New Issue