forked from OSchip/llvm-project
Implement an important entry from README_ALTIVEC:
If an altivec predicate compare is used immediately by a branch, don't use a (serializing) MFCR instruction to read the CR6 register, which requires a compare to get it back to CR's. Instead, just branch on CR6 directly. :) For example, for: void foo2(vector float *A, vector float *B) { if (!vec_any_eq(*A, *B)) *B = (vector float){0,0,0,0}; } We now generate: _foo2: mfspr r2, 256 oris r5, r2, 12288 mtspr 256, r5 lvx v2, 0, r4 lvx v3, 0, r3 vcmpeqfp. v2, v3, v2 bne cr6, LBB1_2 ; UnifiedReturnBlock LBB1_1: ; cond_true vxor v2, v2, v2 stvx v2, 0, r4 mtspr 256, r2 blr LBB1_2: ; UnifiedReturnBlock mtspr 256, r2 blr instead of: _foo2: mfspr r2, 256 oris r5, r2, 12288 mtspr 256, r5 lvx v2, 0, r4 lvx v3, 0, r3 vcmpeqfp. v2, v3, v2 mfcr r3, 2 rlwinm r3, r3, 27, 31, 31 cmpwi cr0, r3, 0 beq cr0, LBB1_2 ; UnifiedReturnBlock LBB1_1: ; cond_true vxor v2, v2, v2 stvx v2, 0, r4 mtspr 256, r2 blr LBB1_2: ; UnifiedReturnBlock mtspr 256, r2 blr This implements CodeGen/PowerPC/vec_br_cmp.ll. llvm-svn: 27804
This commit is contained in:
parent
11a9ac51e8
commit
9754d142a4
|
@ -246,6 +246,7 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
|
|||
// We have target-specific dag combine patterns for the following nodes:
|
||||
setTargetDAGCombine(ISD::SINT_TO_FP);
|
||||
setTargetDAGCombine(ISD::STORE);
|
||||
setTargetDAGCombine(ISD::BR_CC);
|
||||
|
||||
computeRegisterProperties();
|
||||
}
|
||||
|
@ -1460,18 +1461,17 @@ static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
|
|||
return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
|
||||
}
|
||||
|
||||
/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
|
||||
/// lower, do it, otherwise return null.
|
||||
static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
|
||||
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
|
||||
|
||||
// If this is a lowered altivec predicate compare, CompareOpc is set to the
|
||||
// opcode number of the comparison.
|
||||
int CompareOpc = -1;
|
||||
bool isDot = false;
|
||||
switch (IntNo) {
|
||||
default: return SDOperand(); // Don't custom lower most intrinsics.
|
||||
// Comparison predicates.
|
||||
/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
|
||||
/// altivec comparison. If it is, return true and fill in Opc/isDot with
|
||||
/// information about the intrinsic.
|
||||
static bool getAltivecCompareInfo(SDOperand Intrin, int &CompareOpc,
|
||||
bool &isDot) {
|
||||
unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getValue();
|
||||
CompareOpc = -1;
|
||||
isDot = false;
|
||||
switch (IntrinsicID) {
|
||||
default: return false;
|
||||
// Comparison predicates.
|
||||
case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
|
||||
case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
|
||||
case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
|
||||
|
@ -1501,10 +1501,20 @@ static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
|
|||
case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
|
||||
case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
|
||||
/// lower, do it, otherwise return null.
|
||||
static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
|
||||
// If this is a lowered altivec predicate compare, CompareOpc is set to the
|
||||
// opcode number of the comparison.
|
||||
int CompareOpc;
|
||||
bool isDot;
|
||||
if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
|
||||
return SDOperand(); // Don't custom lower most intrinsics.
|
||||
|
||||
assert(CompareOpc>0 && "We only lower altivec predicate compares so far!");
|
||||
|
||||
// If this is a non-dot comparison, make the VCMP node.
|
||||
// If this is a non-dot comparison, make the VCMP node and we are done.
|
||||
if (!isDot) {
|
||||
SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2),
|
||||
|
@ -2198,6 +2208,69 @@ SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
}
|
||||
break;
|
||||
}
|
||||
case ISD::BR_CC: {
|
||||
// If this is a branch on an altivec predicate comparison, lower this so
|
||||
// that we don't have to do a MFCR: instead, branch directly on CR6. This
|
||||
// lowering is done pre-legalize, because the legalizer lowers the predicate
|
||||
// compare down to code that is difficult to reassemble.
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
|
||||
SDOperand LHS = N->getOperand(2), RHS = N->getOperand(3);
|
||||
int CompareOpc;
|
||||
bool isDot;
|
||||
|
||||
if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
|
||||
isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
|
||||
getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
|
||||
assert(isDot && "Can't compare against a vector result!");
|
||||
|
||||
// If this is a comparison against something other than 0/1, then we know
|
||||
// that the condition is never/always true.
|
||||
unsigned Val = cast<ConstantSDNode>(RHS)->getValue();
|
||||
if (Val != 0 && Val != 1) {
|
||||
if (CC == ISD::SETEQ) // Cond never true, remove branch.
|
||||
return N->getOperand(0);
|
||||
// Always !=, turn it into an unconditional branch.
|
||||
return DAG.getNode(ISD::BR, MVT::Other,
|
||||
N->getOperand(0), N->getOperand(4));
|
||||
}
|
||||
|
||||
bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
|
||||
|
||||
// Create the PPCISD altivec 'dot' comparison node.
|
||||
std::vector<SDOperand> Ops;
|
||||
std::vector<MVT::ValueType> VTs;
|
||||
Ops.push_back(LHS.getOperand(2)); // LHS of compare
|
||||
Ops.push_back(LHS.getOperand(3)); // RHS of compare
|
||||
Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32));
|
||||
VTs.push_back(LHS.getOperand(2).getValueType());
|
||||
VTs.push_back(MVT::Flag);
|
||||
SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops);
|
||||
|
||||
// Unpack the result based on how the target uses it.
|
||||
unsigned CompOpc;
|
||||
switch (cast<ConstantSDNode>(LHS.getOperand(1))->getValue()) {
|
||||
default: // Can't happen, don't crash on invalid number though.
|
||||
case 0: // Branch on the value of the EQ bit of CR6.
|
||||
CompOpc = BranchOnWhenPredTrue ? PPC::BEQ : PPC::BNE;
|
||||
break;
|
||||
case 1: // Branch on the inverted value of the EQ bit of CR6.
|
||||
CompOpc = BranchOnWhenPredTrue ? PPC::BNE : PPC::BEQ;
|
||||
break;
|
||||
case 2: // Branch on the value of the LT bit of CR6.
|
||||
CompOpc = BranchOnWhenPredTrue ? PPC::BLT : PPC::BGE;
|
||||
break;
|
||||
case 3: // Branch on the inverted value of the LT bit of CR6.
|
||||
CompOpc = BranchOnWhenPredTrue ? PPC::BGE : PPC::BLT;
|
||||
break;
|
||||
}
|
||||
|
||||
return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0),
|
||||
DAG.getRegister(PPC::CR6, MVT::i32),
|
||||
DAG.getConstant(CompOpc, MVT::i32),
|
||||
N->getOperand(4), CompNode.getValue(1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return SDOperand();
|
||||
|
|
|
@ -96,7 +96,14 @@ namespace llvm {
|
|||
/// altivec VCMP*o instructions. For lack of better number, we use the
|
||||
/// opcode number encoding for the OPC field to identify the compare. For
|
||||
/// example, 838 is VCMPGTSH.
|
||||
VCMPo
|
||||
VCMPo,
|
||||
|
||||
/// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
|
||||
/// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
|
||||
/// condition register to branch on, OPC is the branch opcode to use (e.g.
|
||||
/// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
|
||||
/// an optional input flag argument.
|
||||
COND_BRANCH
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -34,6 +34,10 @@ def SDT_PPCvcmp : SDTypeProfile<1, 3, [
|
|||
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
|
||||
]>;
|
||||
|
||||
def SDT_PPCcondbr : SDTypeProfile<0, 3, [
|
||||
SDTCisVT<1, i32>, SDTCisVT<2, OtherVT>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC specific DAG Nodes.
|
||||
//
|
||||
|
@ -74,6 +78,9 @@ def retflag : SDNode<"PPCISD::RET_FLAG", SDT_PPCRetFlag,
|
|||
def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
|
||||
def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>;
|
||||
|
||||
def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
|
||||
[SDNPHasChain, SDNPOptInFlag]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC specific transformation functions and pattern fragments.
|
||||
//
|
||||
|
@ -263,8 +270,9 @@ let Defs = [LR] in
|
|||
|
||||
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1,
|
||||
noResults = 1, PPC970_Unit = 7 in {
|
||||
def COND_BRANCH : Pseudo<(ops CRRC:$crS, u16imm:$opc, target:$true),
|
||||
"; COND_BRANCH", []>;
|
||||
def COND_BRANCH : Pseudo<(ops CRRC:$crS, u16imm:$opc, target:$dst),
|
||||
"; COND_BRANCH $crS, $opc, $dst",
|
||||
[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]>;
|
||||
def B : IForm<18, 0, 0, (ops target:$dst),
|
||||
"b $dst", BrB,
|
||||
[(br bb:$dst)]>;
|
||||
|
|
|
@ -53,12 +53,6 @@ of C1/C2/C3, then a load and vperm of Variable.
|
|||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
Do not generate the MFCR/RLWINM sequence for predicate compares when the
|
||||
predicate compare is used immediately by a branch. Just branch on the right
|
||||
cond code on CR6.
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
We need a way to teach tblgen that some operands of an intrinsic are required to
|
||||
be constants. The verifier should enforce this constraint.
|
||||
|
||||
|
|
Loading…
Reference in New Issue