forked from OSchip/llvm-project
[SelectionDAG] Expand ADD/SUBCARRY
This patch allows for expansion of ADDCARRY and SUBCARRY when the target does not support it. Differential Revision: https://reviews.llvm.org/D61411 llvm-svn: 360303
This commit is contained in:
parent
2194fb6ed9
commit
95b7abdcc5
|
@ -3273,6 +3273,48 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
|||
case ISD::UMULFIX:
|
||||
Results.push_back(TLI.expandFixedPointMul(Node, DAG));
|
||||
break;
|
||||
case ISD::ADDCARRY:
|
||||
case ISD::SUBCARRY: {
|
||||
SDValue LHS = Node->getOperand(0);
|
||||
SDValue RHS = Node->getOperand(1);
|
||||
SDValue Carry = Node->getOperand(2);
|
||||
|
||||
bool IsAdd = Node->getOpcode() == ISD::ADDCARRY;
|
||||
|
||||
// Initial add of the 2 operands.
|
||||
unsigned Op = IsAdd ? ISD::ADD : ISD::SUB;
|
||||
EVT VT = LHS.getValueType();
|
||||
SDValue Sum = DAG.getNode(Op, dl, VT, LHS, RHS);
|
||||
|
||||
// Initial check for overflow.
|
||||
EVT CarryType = Node->getValueType(1);
|
||||
EVT SetCCType = getSetCCResultType(Node->getValueType(0));
|
||||
ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
|
||||
SDValue Overflow = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
|
||||
|
||||
// Add of the sum and the carry.
|
||||
SDValue CarryExt =
|
||||
DAG.getZeroExtendInReg(DAG.getZExtOrTrunc(Carry, dl, VT), dl, MVT::i1);
|
||||
SDValue Sum2 = DAG.getNode(Op, dl, VT, Sum, CarryExt);
|
||||
|
||||
// Second check for overflow. If we are adding, we can only overflow if the
|
||||
// initial sum is all 1s ang the carry is set, resulting in a new sum of 0.
|
||||
// If we are subtracting, we can only overflow if the initial sum is 0 and
|
||||
// the carry is set, resulting in a new sum of all 1s.
|
||||
SDValue Zero = DAG.getConstant(0, dl, VT);
|
||||
SDValue Overflow2 =
|
||||
IsAdd ? DAG.getSetCC(dl, SetCCType, Sum2, Zero, ISD::SETEQ)
|
||||
: DAG.getSetCC(dl, SetCCType, Sum, Zero, ISD::SETEQ);
|
||||
Overflow2 = DAG.getNode(ISD::AND, dl, SetCCType, Overflow2,
|
||||
DAG.getZExtOrTrunc(Carry, dl, SetCCType));
|
||||
|
||||
SDValue ResultCarry =
|
||||
DAG.getNode(ISD::OR, dl, SetCCType, Overflow, Overflow2);
|
||||
|
||||
Results.push_back(Sum2);
|
||||
Results.push_back(DAG.getBoolExtOrTrunc(ResultCarry, dl, CarryType, VT));
|
||||
break;
|
||||
}
|
||||
case ISD::SADDO:
|
||||
case ISD::SSUBO: {
|
||||
SDValue LHS = Node->getOperand(0);
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefix=RISCV32
|
||||
|
||||
; Test ADDCARRY node expansion on a target that does not currently support ADDCARRY.
|
||||
; Signed fixed point multiplication eventually expands down to an ADDCARRY.
|
||||
|
||||
declare i64 @llvm.smul.fix.i64 (i64, i64, i32)
|
||||
|
||||
define i64 @addcarry(i64 %x, i64 %y) {
|
||||
; RISCV32-LABEL: addcarry:
|
||||
; RISCV32: # %bb.0:
|
||||
; RISCV32-NEXT: mul a4, a0, a3
|
||||
; RISCV32-NEXT: mulhu a5, a0, a2
|
||||
; RISCV32-NEXT: add a4, a5, a4
|
||||
; RISCV32-NEXT: sltu a6, a4, a5
|
||||
; RISCV32-NEXT: mulhu a5, a0, a3
|
||||
; RISCV32-NEXT: add a6, a5, a6
|
||||
; RISCV32-NEXT: mulhu a5, a1, a2
|
||||
; RISCV32-NEXT: add a7, a6, a5
|
||||
; RISCV32-NEXT: mul a5, a1, a2
|
||||
; RISCV32-NEXT: add a6, a4, a5
|
||||
; RISCV32-NEXT: sltu a4, a6, a4
|
||||
; RISCV32-NEXT: add a4, a7, a4
|
||||
; RISCV32-NEXT: mul a5, a1, a3
|
||||
; RISCV32-NEXT: add a5, a4, a5
|
||||
; RISCV32-NEXT: bgez a1, .LBB0_2
|
||||
; RISCV32-NEXT: # %bb.1:
|
||||
; RISCV32-NEXT: sub a5, a5, a2
|
||||
; RISCV32-NEXT: .LBB0_2:
|
||||
; RISCV32-NEXT: bgez a3, .LBB0_4
|
||||
; RISCV32-NEXT: # %bb.3:
|
||||
; RISCV32-NEXT: sub a5, a5, a0
|
||||
; RISCV32-NEXT: .LBB0_4:
|
||||
; RISCV32-NEXT: mul a0, a0, a2
|
||||
; RISCV32-NEXT: srli a0, a0, 2
|
||||
; RISCV32-NEXT: slli a1, a6, 30
|
||||
; RISCV32-NEXT: or a0, a0, a1
|
||||
; RISCV32-NEXT: srli a1, a6, 2
|
||||
; RISCV32-NEXT: slli a2, a5, 30
|
||||
; RISCV32-NEXT: or a1, a1, a2
|
||||
; RISCV32-NEXT: ret
|
||||
%tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 2);
|
||||
ret i64 %tmp;
|
||||
}
|
Loading…
Reference in New Issue