For Thumb1, lower ADDC/ADDE/SUBC/SUBE via the glueless ARMISD nodes,

same as already done for ARM and Thumb2.

Reviewers: jmolloy, rogfer01, efriedma

Subscribers: aemerson, llvm-commits, rengolin

Differential Revision: https://reviews.llvm.org/D30400

llvm-svn: 297443
This commit is contained in:
Artyom Skrobov 2017-03-10 07:40:27 +00:00
parent 1de4792c55
commit 0c93ceb5d8
4 changed files with 273 additions and 36 deletions

View File

@ -2036,6 +2036,16 @@ static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
{ARM::RSBSrsi, ARM::RSBrsi},
{ARM::RSBSrsr, ARM::RSBrsr},
{ARM::tADDSi3, ARM::tADDi3},
{ARM::tADDSi8, ARM::tADDi8},
{ARM::tADDSrr, ARM::tADDrr},
{ARM::tADCS, ARM::tADC},
{ARM::tSUBSi3, ARM::tSUBi3},
{ARM::tSUBSi8, ARM::tSUBi8},
{ARM::tSUBSrr, ARM::tSUBrr},
{ARM::tSBCS, ARM::tSBC},
{ARM::t2ADDSri, ARM::t2ADDri},
{ARM::t2ADDSrr, ARM::t2ADDrr},
{ARM::t2ADDSrs, ARM::t2ADDrs},

View File

@ -826,13 +826,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRL, MVT::i64, Custom);
setOperationAction(ISD::SRA, MVT::i64, Custom);
if (!Subtarget->isThumb1Only()) {
// FIXME: We should do this for Thumb1 as well.
setOperationAction(ISD::ADDC, MVT::i32, Custom);
setOperationAction(ISD::ADDE, MVT::i32, Custom);
setOperationAction(ISD::SUBC, MVT::i32, Custom);
setOperationAction(ISD::SUBE, MVT::i32, Custom);
}
setOperationAction(ISD::ADDC, MVT::i32, Custom);
setOperationAction(ISD::ADDE, MVT::i32, Custom);
setOperationAction(ISD::SUBC, MVT::i32, Custom);
setOperationAction(ISD::SUBE, MVT::i32, Custom);
if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
@ -9059,19 +9056,45 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
// Rename pseudo opcodes.
unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
unsigned ccOutIdx;
if (NewOpc) {
const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
MCID = &TII->get(NewOpc);
assert(MCID->getNumOperands() == MI.getDesc().getNumOperands() + 1 &&
"converted opcode should be the same except for cc_out");
assert(MCID->getNumOperands() ==
MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
&& "converted opcode should be the same except for cc_out"
" (and, on Thumb1, pred)");
MI.setDesc(*MCID);
// Add the optional cc_out operand
MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
}
unsigned ccOutIdx = MCID->getNumOperands() - 1;
// On Thumb1, move all input operands to the end, then add the predicate
if (Subtarget->isThumb1Only()) {
for (unsigned c = MCID->getNumOperands() - 4; c--;) {
MI.addOperand(MI.getOperand(1));
MI.RemoveOperand(1);
}
// Restore the ties
for (unsigned i = MI.getNumOperands(); i--;) {
const MachineOperand& op = MI.getOperand(i);
if (op.isReg() && op.isUse()) {
int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
if (DefIdx != -1)
MI.tieOperands(DefIdx, i);
}
}
MI.addOperand(MachineOperand::CreateImm(ARMCC::AL));
MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
ccOutIdx = 1;
} else
ccOutIdx = MCID->getNumOperands() - 1;
} else
ccOutIdx = MCID->getNumOperands() - 1;
// Any ARM instruction that sets the 's' bit should specify an optional
// "cc_out" operand in the last operand position.
@ -9102,7 +9125,9 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
if (deadCPSR) {
assert(!MI.getOperand(ccOutIdx).getReg() &&
"expect uninitialized optional cc_out operand");
return;
// Thumb1 instructions must have the S bit even if the CPSR is dead.
if (!Subtarget->isThumb1Only())
return;
}
// If this instruction was defined with an optional CPSR def and its dag node
@ -9649,6 +9674,30 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
return SDValue();
}
static SDValue PerformAddeSubeCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
if (Subtarget->isThumb1Only()) {
SDValue RHS = N->getOperand(1);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
int64_t imm = C->getSExtValue();
if (imm < 0) {
SDLoc DL(N);
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already
// accounts for part of the negation.
RHS = DAG.getConstant(~imm, DL, MVT::i32);
unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
: ARMISD::ADDE;
return DAG.getNode(Opcode, DL, N->getVTList(),
N->getOperand(0), RHS, N->getOperand(2));
}
}
}
return SDValue();
}
/// PerformADDCCombine - Target-specific dag combine transform from
/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or
/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
@ -11691,6 +11740,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
case ARMISD::ADDE:
case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);
case ARMISD::BFI: return PerformBFICombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);

View File

@ -910,7 +910,7 @@ let isAdd = 1 in {
def tADC : // A8.6.2
T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
"adc", "\t$Rdn, $Rm",
[(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
[]>, Sched<[WriteALU]>;
// Add immediate
def tADDi3 : // A8.6.4 T1
@ -938,6 +938,43 @@ let isAdd = 1 in {
"add", "\t$Rd, $Rn, $Rm",
[(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
/// Similar to the above except these set the 's' bit so the
/// instruction modifies the CPSR register.
///
/// These opcodes will be converted to the real non-S opcodes by
/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
let hasPostISelHook = 1, Defs = [CPSR] in {
let isCommutable = 1 in
def tADCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
2, IIC_iALUr,
[(set tGPR:$Rdn, CPSR, (ARMadde tGPR:$Rn, tGPR:$Rm,
CPSR))]>,
Requires<[IsThumb1Only]>,
Sched<[WriteALU]>;
def tADDSi3 : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
2, IIC_iALUi,
[(set tGPR:$Rd, CPSR, (ARMaddc tGPR:$Rm,
imm0_7:$imm3))]>,
Requires<[IsThumb1Only]>,
Sched<[WriteALU]>;
def tADDSi8 : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8),
2, IIC_iALUi,
[(set tGPR:$Rdn, CPSR, (ARMaddc tGPR:$Rn,
imm8_255:$imm8))]>,
Requires<[IsThumb1Only]>,
Sched<[WriteALU]>;
let isCommutable = 1 in
def tADDSrr : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
2, IIC_iALUr,
[(set tGPR:$Rd, CPSR, (ARMaddc tGPR:$Rn,
tGPR:$Rm))]>,
Requires<[IsThumb1Only]>,
Sched<[WriteALU]>;
}
let hasSideEffects = 0 in
def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
"add", "\t$Rdn, $Rm", []>,
@ -1197,7 +1234,7 @@ def tSBC : // A8.6.151
T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iALUr,
"sbc", "\t$Rdn, $Rm",
[(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>,
[]>,
Sched<[WriteALU]>;
// Subtract immediate
@ -1226,6 +1263,41 @@ def tSUBrr : // A8.6.212
[(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>,
Sched<[WriteALU]>;
/// Similar to the above except these set the 's' bit so the
/// instruction modifies the CPSR register.
///
/// These opcodes will be converted to the real non-S opcodes by
/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
let hasPostISelHook = 1, Defs = [CPSR] in {
def tSBCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
2, IIC_iALUr,
[(set tGPR:$Rdn, CPSR, (ARMsube tGPR:$Rn, tGPR:$Rm,
CPSR))]>,
Requires<[IsThumb1Only]>,
Sched<[WriteALU]>;
def tSUBSi3 : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
2, IIC_iALUi,
[(set tGPR:$Rd, CPSR, (ARMsubc tGPR:$Rm,
imm0_7:$imm3))]>,
Requires<[IsThumb1Only]>,
Sched<[WriteALU]>;
def tSUBSi8 : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8),
2, IIC_iALUi,
[(set tGPR:$Rdn, CPSR, (ARMsubc tGPR:$Rn,
imm8_255:$imm8))]>,
Requires<[IsThumb1Only]>,
Sched<[WriteALU]>;
def tSUBSrr : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
2, IIC_iALUr,
[(set tGPR:$Rd, CPSR, (ARMsubc tGPR:$Rn,
tGPR:$Rm))]>,
Requires<[IsThumb1Only]>,
Sched<[WriteALU]>;
}
// Sign-extend byte
def tSXTB : // A8.6.222
T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
@ -1386,21 +1458,11 @@ def : T1Pat<(ARMcmpZ tGPR:$Rn, imm0_255:$imm8),
def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm),
(tCMPr tGPR:$Rn, tGPR:$Rm)>;
// Add with carry
def : T1Pat<(addc tGPR:$lhs, imm0_7:$rhs),
(tADDi3 tGPR:$lhs, imm0_7:$rhs)>;
def : T1Pat<(addc tGPR:$lhs, imm8_255:$rhs),
(tADDi8 tGPR:$lhs, imm8_255:$rhs)>;
def : T1Pat<(addc tGPR:$lhs, tGPR:$rhs),
(tADDrr tGPR:$lhs, tGPR:$rhs)>;
// Subtract with carry
def : T1Pat<(addc tGPR:$lhs, imm0_7_neg:$rhs),
(tSUBi3 tGPR:$lhs, imm0_7_neg:$rhs)>;
def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs),
(tSUBi8 tGPR:$lhs, imm8_255_neg:$rhs)>;
def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs),
(tSUBrr tGPR:$lhs, tGPR:$rhs)>;
def : T1Pat<(ARMaddc tGPR:$lhs, imm0_7_neg:$rhs),
(tSUBSi3 tGPR:$lhs, imm0_7_neg:$rhs)>;
def : T1Pat<(ARMaddc tGPR:$lhs, imm8_255_neg:$rhs),
(tSUBSi8 tGPR:$lhs, imm8_255_neg:$rhs)>;
// Bswap 16 with load/store
def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)),

View File

@ -1,33 +1,47 @@
; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s
; RUN: llc -mtriple=thumb-apple-darwin %s -o - | \
; RUN: llc -mtriple=thumb-eabi %s -verify-machineinstrs -o - | FileCheck %s
; RUN: llc -mtriple=thumb-apple-darwin %s -verify-machineinstrs -o - | \
; RUN: FileCheck %s -check-prefix CHECK -check-prefix CHECK-DARWIN
define i64 @f1() {
entry:
ret i64 0
; CHECK-LABEL: f1:
; CHECK: movs r0, #0
; CHECK: movs r1, r0
}
define i64 @f2() {
entry:
ret i64 1
; CHECK-LABEL: f2:
; CHECK: movs r0, #1
; CHECK: movs r1, #0
}
define i64 @f3() {
entry:
ret i64 2147483647
; CHECK-LABEL: f3:
; CHECK: ldr r0,
; CHECK: movs r1, #0
}
define i64 @f4() {
entry:
ret i64 2147483648
; CHECK-LABEL: f4:
; CHECK: movs r0, #1
; CHECK: lsls r0, r0, #31
; CHECK: movs r1, #0
}
define i64 @f5() {
entry:
ret i64 9223372036854775807
; CHECK-LABEL: f5:
; CHECK: mvn
; CHECK-NOT: mvn
; CHECK: movs r0, #0
; CHECK: mvns r0, r0
; CHECK: ldr r1,
}
define i64 @f6(i64 %x, i64 %y) {
@ -35,14 +49,40 @@ entry:
%tmp1 = add i64 %y, 1 ; <i64> [#uses=1]
ret i64 %tmp1
; CHECK-LABEL: f6:
; CHECK: adc
; CHECK-NOT: adc
; CHECK: movs r1, #0
; CHECK: adds r0, r2, #1
; CHECK: adcs r1, r3
}
define i64 @f6a(i64 %x, i64 %y) {
entry:
%tmp1 = add i64 %y, 10
ret i64 %tmp1
; CHECK-LABEL: f6a:
; CHECK: movs r1, #0
; CHECK: adds r2, #10
; CHECK: adcs r1, r3
; CHECK: movs r0, r2
}
define i64 @f6b(i64 %x, i64 %y) {
entry:
%tmp1 = add i64 %y, 1000
ret i64 %tmp1
; CHECK-LABEL: f6b:
; CHECK: movs r0, #125
; CHECK: lsls r0, r0, #3
; CHECK: movs r1, #0
; CHECK: adds r0, r2, r0
; CHECK: adcs r1, r3
}
define void @f7() {
entry:
%tmp = call i64 @f8( ) ; <i64> [#uses=0]
ret void
; CHECK-LABEL: f7:
; CHECK: bl
}
declare i64 @f8()
@ -52,8 +92,59 @@ entry:
%tmp = sub i64 %a, %b ; <i64> [#uses=1]
ret i64 %tmp
; CHECK-LABEL: f9:
; CHECK: sbc
; CHECK-NOT: sbc
; CHECK: subs r0, r0, r2
; CHECK: sbcs r1, r3
}
define i64 @f9a(i64 %x, i64 %y) { ; ADDC with small negative imm => SUBS imm
entry:
%tmp1 = sub i64 %y, 10
ret i64 %tmp1
; CHECK-LABEL: f9a:
; CHECK: movs r0, #0
; CHECK: subs r2, #10
; CHECK: sbcs r3, r0
; CHECK: movs r0, r2
; CHECK: movs r1, r3
}
define i64 @f9b(i64 %x, i64 %y) { ; ADDC with big negative imm => SUBS reg
entry:
%tmp1 = sub i64 1000, %y
ret i64 %tmp1
; CHECK-LABEL: f9b:
; CHECK: movs r0, #125
; CHECK: lsls r0, r0, #3
; CHECK: movs r1, #0
; CHECK: subs r0, r0, r2
; CHECK: sbcs r1, r3
}
define i64 @f9c(i64 %x, i32 %y) { ; SUBS with small positive imm => SUBS imm
entry:
%conv = sext i32 %y to i64
%shl = shl i64 %conv, 32
%or = or i64 %shl, 1
%sub = sub nsw i64 %x, %or
ret i64 %sub
; CHECK-LABEL: f9c:
; CHECK: subs r0, r0, #1
; CHECK: sbcs r1, r2
}
define i64 @f9d(i64 %x, i32 %y) { ; SUBS with small negative imm => SUBS reg
; FIXME: this would be better lowered as an `ADDS imm`
entry:
%conv = sext i32 %y to i64
%shl = shl i64 %conv, 32
%or = or i64 %shl, 4294967295
%sub = sub nsw i64 %x, %or
ret i64 %sub
; CHECK-LABEL: f9d:
; CHECK: movs r3, #0
; CHECK: mvns r3, r3
; CHECK: subs r0, r0, r3
; CHECK: sbcs r1, r2
}
define i64 @f(i32 %a, i32 %b) {
@ -63,6 +154,7 @@ entry:
%tmp2 = mul i64 %tmp1, %tmp ; <i64> [#uses=1]
ret i64 %tmp2
; CHECK-LABEL: f:
; CHECK-V6: bl __aeabi_lmul
; CHECK-DARWIN: __muldi3
}
@ -73,6 +165,7 @@ entry:
%tmp2 = mul i64 %tmp1, %tmp ; <i64> [#uses=1]
ret i64 %tmp2
; CHECK-LABEL: g:
; CHECK-V6: bl __aeabi_lmul
; CHECK-DARWIN: __muldi3
}
@ -81,4 +174,25 @@ entry:
%a = alloca i64, align 8 ; <i64*> [#uses=1]
%retval = load i64, i64* %a ; <i64> [#uses=1]
ret i64 %retval
; CHECK-LABEL: f10:
; CHECK: sub sp, #8
; CHECK: ldr r0, [sp]
; CHECK: ldr r1, [sp, #4]
; CHECK: add sp, #8
}
define i64 @f11(i64 %x, i64 %y) {
entry:
%tmp1 = add i64 -1000, %y
%tmp2 = add i64 %tmp1, -1000
ret i64 %tmp2
; CHECK-LABEL: f11:
; CHECK: movs r1, #0
; CHECK: ldr r0,
; CHECK: adds r2, r2, r0
; CHECK: sbcs r3, r1
; CHECK: adds r0, r2, r0
; CHECK: sbcs r3, r1
; CHECK: movs r1, r3
}