[ARM] Enable isel of UMAAL

TargetLowering and DAGToDAG are used to combine ADDC, ADDE and UMLAL
dags into UMAAL. Selection is split into the two phases because it
is easier to match the two patterns at those different times.

Differential Revision: http://http://reviews.llvm.org/D21461

llvm-svn: 273165
This commit is contained in:
Sam Parker 2016-06-20 16:47:09 +00:00
parent 0f89833c31
commit d616cf07b2
6 changed files with 162 additions and 10 deletions

View File

@ -2939,7 +2939,47 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
}
case ARMISD::UMAAL: {
unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
N->getOperand(2), N->getOperand(3),
getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32) };
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
return;
}
case ARMISD::UMLAL:{
// UMAAL is similar to UMLAL but it adds two 32-bit values to the
// 64-bit multiplication result.
if (Subtarget->hasV6Ops() && N->getOperand(2).getOpcode() == ARMISD::ADDC &&
N->getOperand(3).getOpcode() == ARMISD::ADDE) {
SDValue Addc = N->getOperand(2);
SDValue Adde = N->getOperand(3);
if (Adde.getOperand(2).getNode() == Addc.getNode()) {
ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0));
ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1));
if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0)
{
// Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm
// RdLo = one operand to be added, lower 32-bits of res
// RdHi = other operand to be added, upper 32-bits of res
// Rn = first multiply operand
// Rm = second multiply operand
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
Addc.getOperand(0), Addc.getOperand(1),
getAL(CurDAG, dl),
CurDAG->getRegister(0, MVT::i32) };
unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops);
return;
}
}
}
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG, dl),

View File

@ -1212,6 +1212,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VTBL2: return "ARMISD::VTBL2";
case ARMISD::VMULLs: return "ARMISD::VMULLs";
case ARMISD::VMULLu: return "ARMISD::VMULLu";
case ARMISD::UMAAL: return "ARMISD::UMAAL";
case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
@ -8686,11 +8687,6 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
if (Subtarget->isThumb1Only()) return SDValue();
// Only perform the checks after legalize when the pattern is available.
if (DCI.isBeforeLegalize()) return SDValue();
// Look for multiply add opportunities.
// The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
// each add nodes consumes a value from ISD::UMUL_LOHI and there is
@ -8818,14 +8814,97 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
return resNode;
}
static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// UMAAL is similar to UMLAL except that it adds two unsigned values.
// While trying to combine for the other MLAL nodes, first search for the
// chance to use UMAAL. Check if Addc uses another addc node which can first
// be combined into a UMLAL. The other pattern is AddcNode being combined
// into an UMLAL and then using another addc is handled in ISelDAGToDAG.
if (!Subtarget->hasV6Ops())
return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
SDNode *PrevAddc = nullptr;
if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC)
PrevAddc = AddcNode->getOperand(0).getNode();
else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC)
PrevAddc = AddcNode->getOperand(1).getNode();
// If there's no addc chains, just return a search for any MLAL.
if (PrevAddc == nullptr)
return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
// Try to convert the addc operand to an MLAL and if that fails try to
// combine AddcNode.
SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget);
if (MLAL != SDValue(PrevAddc, 0))
return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
// Find the converted UMAAL or quit if it doesn't exist.
SDNode *UmlalNode = nullptr;
SDValue AddHi;
if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
UmlalNode = AddcNode->getOperand(0).getNode();
AddHi = AddcNode->getOperand(1);
} else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
UmlalNode = AddcNode->getOperand(1).getNode();
AddHi = AddcNode->getOperand(0);
} else {
return SDValue();
}
// The ADDC should be glued to an ADDE node, which uses the same UMLAL as
// the ADDC as well as Zero.
auto *Zero = dyn_cast<ConstantSDNode>(UmlalNode->getOperand(3));
if (!Zero || Zero->getZExtValue() != 0)
return SDValue();
// Check that we have a glued ADDC node.
if (AddcNode->getValueType(1) != MVT::Glue)
return SDValue();
// Look for the glued ADDE.
SDNode* AddeNode = AddcNode->getGluedUser();
if (!AddeNode)
return SDValue();
if ((AddeNode->getOperand(0).getNode() == Zero &&
AddeNode->getOperand(1).getNode() == UmlalNode) ||
(AddeNode->getOperand(0).getNode() == UmlalNode &&
AddeNode->getOperand(1).getNode() == Zero)) {
SelectionDAG &DAG = DCI.DAG;
SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
UmlalNode->getOperand(2), AddHi };
SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
DAG.getVTList(MVT::i32, MVT::i32), Ops);
// Replace the ADDs' nodes uses by the UMAAL node's values.
DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
// Return original node to notify the driver to stop replacing.
return SDValue(AddcNode, 0);
}
return SDValue();
}
/// PerformADDCCombine - Target-specific dag combine transform from
/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL.
/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or
/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
static SDValue PerformADDCCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
return AddCombineTo64bitMLAL(N, DCI, Subtarget);
if (Subtarget->isThumb1Only()) return SDValue();
// Only perform the checks after legalize when the pattern is available.
if (DCI.isBeforeLegalize()) return SDValue();
return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
}
/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with

View File

@ -163,6 +163,7 @@ namespace llvm {
UMLAL, // 64bit Unsigned Accumulate Multiply
SMLAL, // 64bit Signed Accumulate Multiply
UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other

View File

@ -95,6 +95,7 @@ def SDT_ARM64bitmlal : SDTypeProfile<2,4, [ SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<4, i32>, SDTCisVT<5, i32> ] >;
def ARMUmlal : SDNode<"ARMISD::UMLAL", SDT_ARM64bitmlal>;
def ARMSmlal : SDNode<"ARMISD::SMLAL", SDT_ARM64bitmlal>;
def ARMUmaal : SDNode<"ARMISD::UMAAL", SDT_ARM64bitmlal>;
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
@ -3950,9 +3951,10 @@ def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
IIC_iMAC64,
"umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
Requires<[IsARM, HasV6]> {
RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]> {
bits<4> RdLo;
bits<4> RdHi;
bits<4> Rm;

View File

@ -2593,8 +2593,9 @@ def t2UMLAL : T2MlaLong<0b110, 0b0000,
def t2UMAAL : T2MulLong<0b110, 0b0110,
(outs rGPR:$RdLo, rGPR:$RdHi),
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
"umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
Requires<[IsThumb2, HasDSP]>;
} // hasSideEffects

View File

@ -116,3 +116,32 @@ define i64 @MACLongTest8(i64 %acc, i32 %lhs, i32 %rhs) {
ret i64 %add
}
define i64 @MACLongTest9(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
;CHECK-LABEL: MACLongTest9:
;CHECK-V7-LE:umaal
;CHECK-V7-BE:umaal
;CHECK-NOT:umaal
%conv = zext i32 %lhs to i64
%conv1 = zext i32 %rhs to i64
%mul = mul nuw i64 %conv1, %conv
%conv2 = zext i32 %lo to i64
%add = add i64 %mul, %conv2
%conv3 = zext i32 %hi to i64
%add2 = add i64 %add, %conv3
ret i64 %add2
}
define i64 @MACLongTest10(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
;CHECK-LABEL: MACLongTest10:
;CHECK-V7-LE:umaal
;CHECK-V7-BE:umaal
;CHECK-NOT:umaal
%conv = zext i32 %lhs to i64
%conv1 = zext i32 %rhs to i64
%mul = mul nuw i64 %conv1, %conv
%conv2 = zext i32 %lo to i64
%conv3 = zext i32 %hi to i64
%add = add i64 %conv2, %conv3
%add2 = add i64 %add, %mul
ret i64 %add2
}