forked from OSchip/llvm-project
Thumb2 and ARM add/subtract with carry fixes.
Fixes Thumb2 ADCS and SBCS lowering: <rdar://problem/9275821>. t2ADCS/t2SBCS are now pseudo instructions, consistent with ARM, so the assembly printer correctly prints the 's' suffix. Fixes Thumb2 adde -> SBC matching to check for live/dead carry flags. Fixes the internal ARM machine opcode mnemonic for ADCS/SBCS. Fixes ARM SBC lowering to check for live carry (potential bug). llvm-svn: 130048
This commit is contained in:
parent
3260c9df4e
commit
0ed5778a1e
|
@ -5040,6 +5040,72 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
|
|||
llvm_unreachable("Expecting a BB with two successors!");
|
||||
}
|
||||
|
||||
// FIXME: This opcode table should obviously be expressed in the target
|
||||
// description. We probably just need a "machine opcode" value in the pseudo
|
||||
// instruction. But the ideal solution maybe to simply remove the "S" version
|
||||
// of the opcode altogether.
|
||||
struct AddSubFlagsOpcodePair {
|
||||
unsigned PseudoOpc;
|
||||
unsigned MachineOpc;
|
||||
};
|
||||
|
||||
static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
|
||||
{ARM::ADCSri, ARM::ADCri},
|
||||
{ARM::ADCSrr, ARM::ADCrr},
|
||||
{ARM::ADCSrs, ARM::ADCrs},
|
||||
{ARM::SBCSri, ARM::SBCri},
|
||||
{ARM::SBCSrr, ARM::SBCrr},
|
||||
{ARM::SBCSrs, ARM::SBCrs},
|
||||
{ARM::RSBSri, ARM::RSBri},
|
||||
{ARM::RSBSrr, ARM::RSBrr},
|
||||
{ARM::RSBSrs, ARM::RSBrs},
|
||||
{ARM::RSCSri, ARM::RSCri},
|
||||
{ARM::RSCSrs, ARM::RSCrs},
|
||||
{ARM::t2ADCSri, ARM::t2ADCri},
|
||||
{ARM::t2ADCSrr, ARM::t2ADCrr},
|
||||
{ARM::t2ADCSrs, ARM::t2ADCrs},
|
||||
{ARM::t2SBCSri, ARM::t2SBCri},
|
||||
{ARM::t2SBCSrr, ARM::t2SBCrr},
|
||||
{ARM::t2SBCSrs, ARM::t2SBCrs},
|
||||
{ARM::t2RSBSri, ARM::t2RSBri},
|
||||
{ARM::t2RSBSrs, ARM::t2RSBrs},
|
||||
};
|
||||
|
||||
// Convert and Add or Subtract with Carry and Flags to a generic opcode with
|
||||
// CPSR<def> operand. e.g. ADCS (...) -> ADC (... CPSR<def>).
|
||||
//
|
||||
// FIXME: Somewhere we should assert that CPSR<def> is in the correct
|
||||
// position to be recognized by the target descrition as the 'S' bit.
|
||||
bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI,
|
||||
MachineBasicBlock *BB) const {
|
||||
unsigned OldOpc = MI->getOpcode();
|
||||
unsigned NewOpc = 0;
|
||||
|
||||
// This is only called for instructions that need remapping, so iterating over
|
||||
// the tiny opcode table is not costly.
|
||||
static const int NPairs =
|
||||
sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
|
||||
for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0],
|
||||
*End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) {
|
||||
if (OldOpc == Pair->PseudoOpc) {
|
||||
NewOpc = Pair->MachineOpc;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!NewOpc)
|
||||
return false;
|
||||
|
||||
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
|
||||
DebugLoc dl = MI->getDebugLoc();
|
||||
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
|
||||
for (unsigned i = 0; i < MI->getNumOperands(); ++i)
|
||||
MIB.addOperand(MI->getOperand(i));
|
||||
AddDefaultPred(MIB);
|
||||
MIB.addReg(ARM::CPSR, RegState::Define); // S bit
|
||||
MI->eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
||||
MachineBasicBlock *BB) const {
|
||||
|
@ -5047,10 +5113,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
|||
DebugLoc dl = MI->getDebugLoc();
|
||||
bool isThumb2 = Subtarget->isThumb2();
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
default: {
|
||||
if (RemapAddSubWithFlags(MI, BB))
|
||||
return BB;
|
||||
|
||||
MI->dump();
|
||||
llvm_unreachable("Unexpected instr type to insert");
|
||||
|
||||
}
|
||||
case ARM::ATOMIC_LOAD_ADD_I8:
|
||||
return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
|
||||
case ARM::ATOMIC_LOAD_ADD_I16:
|
||||
|
@ -5101,68 +5170,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
|||
case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
|
||||
case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
|
||||
|
||||
case ARM::ADCSSri:
|
||||
case ARM::ADCSSrr:
|
||||
case ARM::ADCSSrs:
|
||||
case ARM::SBCSSri:
|
||||
case ARM::SBCSSrr:
|
||||
case ARM::SBCSSrs:
|
||||
case ARM::RSBSri:
|
||||
case ARM::RSBSrr:
|
||||
case ARM::RSBSrs:
|
||||
case ARM::RSCSri:
|
||||
case ARM::RSCSrs: {
|
||||
unsigned OldOpc = MI->getOpcode();
|
||||
unsigned Opc = 0;
|
||||
switch (OldOpc) {
|
||||
case ARM::ADCSSrr:
|
||||
Opc = ARM::ADCrr;
|
||||
break;
|
||||
case ARM::ADCSSri:
|
||||
Opc = ARM::ADCri;
|
||||
break;
|
||||
case ARM::ADCSSrs:
|
||||
Opc = ARM::ADCrs;
|
||||
break;
|
||||
case ARM::SBCSSrr:
|
||||
Opc = ARM::SBCrr;
|
||||
break;
|
||||
case ARM::SBCSSri:
|
||||
Opc = ARM::SBCri;
|
||||
break;
|
||||
case ARM::SBCSSrs:
|
||||
Opc = ARM::SBCrs;
|
||||
break;
|
||||
case ARM::RSBSri:
|
||||
Opc = ARM::RSBri;
|
||||
break;
|
||||
case ARM::RSBSrr:
|
||||
Opc = ARM::RSBrr;
|
||||
break;
|
||||
case ARM::RSBSrs:
|
||||
Opc = ARM::RSBrs;
|
||||
break;
|
||||
case ARM::RSCSri:
|
||||
Opc = ARM::RSCri;
|
||||
break;
|
||||
case ARM::RSCSrs:
|
||||
Opc = ARM::RSCrs;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unknown opcode?");
|
||||
}
|
||||
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(Opc));
|
||||
for (unsigned i = 0; i < MI->getNumOperands(); ++i)
|
||||
MIB.addOperand(MI->getOperand(i));
|
||||
AddDefaultPred(MIB);
|
||||
MIB.addReg(ARM::CPSR, RegState::Define); // S bit
|
||||
MI->eraseFromParent();
|
||||
return BB;
|
||||
}
|
||||
|
||||
|
||||
case ARM::tMOVCCr_pseudo: {
|
||||
// To "insert" a SELECT_CC instruction, we actually have to insert the
|
||||
// diamond control-flow pattern. The incoming instruction knows the
|
||||
|
@ -5474,7 +5481,7 @@ static SDValue PerformANDCombine(SDNode *N,
|
|||
|
||||
if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
|
||||
return SDValue();
|
||||
|
||||
|
||||
APInt SplatBits, SplatUndef;
|
||||
unsigned SplatBitSize;
|
||||
bool HasAnyUndefs;
|
||||
|
@ -5510,7 +5517,7 @@ static SDValue PerformORCombine(SDNode *N,
|
|||
|
||||
if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
|
||||
return SDValue();
|
||||
|
||||
|
||||
APInt SplatBits, SplatUndef;
|
||||
unsigned SplatBitSize;
|
||||
bool HasAnyUndefs;
|
||||
|
|
|
@ -485,6 +485,7 @@ namespace llvm {
|
|||
unsigned Size,
|
||||
unsigned BinOpcode) const;
|
||||
|
||||
bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
|
||||
};
|
||||
|
||||
enum NEONModImmType {
|
||||
|
|
|
@ -940,16 +940,16 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
|
|||
// NOTE: CPSR def omitted because it will be handled by the custom inserter.
|
||||
let usesCustomInserter = 1 in {
|
||||
multiclass AI1_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
|
||||
def Sri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
|
||||
Size4Bytes, IIC_iALUi,
|
||||
def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
|
||||
Size4Bytes, IIC_iALUi,
|
||||
[(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>;
|
||||
def Srr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
|
||||
Size4Bytes, IIC_iALUr,
|
||||
def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
|
||||
Size4Bytes, IIC_iALUr,
|
||||
[(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
def Srs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
|
||||
Size4Bytes, IIC_iALUsr,
|
||||
def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
|
||||
Size4Bytes, IIC_iALUsr,
|
||||
[(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>;
|
||||
}
|
||||
}
|
||||
|
@ -2330,8 +2330,10 @@ def : ARMPat<(addc GPR:$src, so_imm_neg:$imm),
|
|||
// The with-carry-in form matches bitwise not instead of the negation.
|
||||
// Effectively, the inverse interpretation of the carry flag already accounts
|
||||
// for part of the negation.
|
||||
def : ARMPat<(adde GPR:$src, so_imm_not:$imm),
|
||||
def : ARMPat<(adde_dead_carry GPR:$src, so_imm_not:$imm),
|
||||
(SBCri GPR:$src, so_imm_not:$imm)>;
|
||||
def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm),
|
||||
(SBCSri GPR:$src, so_imm_not:$imm)>;
|
||||
|
||||
// Note: These are implemented in C++ code, because they have to generate
|
||||
// ADD/SUBrs instructions, which use a complex pattern that a xform function
|
||||
|
|
|
@ -681,49 +681,27 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
|
|||
let Inst{24-21} = opcod;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Carry setting variants
|
||||
let isCodeGenOnly = 1, Defs = [CPSR] in {
|
||||
multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
|
||||
bit Commutable = 0> {
|
||||
// NOTE: CPSR def omitted because it will be handled by the custom inserter.
|
||||
let usesCustomInserter = 1 in {
|
||||
multiclass T2I_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
|
||||
// shifted imm
|
||||
def ri : T2sTwoRegImm<
|
||||
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
|
||||
opc, "\t$Rd, $Rn, $imm",
|
||||
[(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
|
||||
Requires<[IsThumb2]> {
|
||||
let Inst{31-27} = 0b11110;
|
||||
let Inst{25} = 0;
|
||||
let Inst{24-21} = opcod;
|
||||
let Inst{20} = 1; // The S bit.
|
||||
let Inst{15} = 0;
|
||||
}
|
||||
def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
|
||||
Size4Bytes, IIC_iALUi,
|
||||
[(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>;
|
||||
// register
|
||||
def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
|
||||
opc, ".w\t$Rd, $Rn, $Rm",
|
||||
[(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
|
||||
Requires<[IsThumb2]> {
|
||||
def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
|
||||
Size4Bytes, IIC_iALUr,
|
||||
[(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
|
||||
let isCommutable = Commutable;
|
||||
let Inst{31-27} = 0b11101;
|
||||
let Inst{26-25} = 0b01;
|
||||
let Inst{24-21} = opcod;
|
||||
let Inst{20} = 1; // The S bit.
|
||||
let Inst{14-12} = 0b000; // imm3
|
||||
let Inst{7-6} = 0b00; // imm2
|
||||
let Inst{5-4} = 0b00; // type
|
||||
}
|
||||
// shifted register
|
||||
def rs : T2sTwoRegShiftedReg<
|
||||
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
|
||||
IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
|
||||
[(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
|
||||
Requires<[IsThumb2]> {
|
||||
let Inst{31-27} = 0b11101;
|
||||
let Inst{26-25} = 0b01;
|
||||
let Inst{24-21} = opcod;
|
||||
let Inst{20} = 1; // The S bit.
|
||||
}
|
||||
}
|
||||
def rs : t2PseudoInst<
|
||||
(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
|
||||
Size4Bytes, IIC_iALUsi,
|
||||
[(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1803,10 +1781,8 @@ defm t2ADC : T2I_adde_sube_irs<0b1010, "adc",
|
|||
BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
|
||||
defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc",
|
||||
BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
|
||||
defm t2ADCS : T2I_adde_sube_s_irs<0b1010, "adc",
|
||||
BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
|
||||
defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc",
|
||||
BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
|
||||
defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
|
||||
defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
|
||||
|
||||
// RSB
|
||||
defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
|
||||
|
@ -1837,9 +1813,14 @@ def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm),
|
|||
// Effectively, the inverse interpretation of the carry flag already accounts
|
||||
// for part of the negation.
|
||||
let AddedComplexity = 1 in
|
||||
def : T2Pat<(adde rGPR:$src, imm0_255_not:$imm),
|
||||
def : T2Pat<(adde_dead_carry rGPR:$src, imm0_255_not:$imm),
|
||||
(t2SBCri rGPR:$src, imm0_255_not:$imm)>;
|
||||
def : T2Pat<(adde_dead_carry rGPR:$src, t2_so_imm_not:$imm),
|
||||
(t2SBCri rGPR:$src, t2_so_imm_not:$imm)>;
|
||||
let AddedComplexity = 1 in
|
||||
def : T2Pat<(adde_live_carry rGPR:$src, imm0_255_not:$imm),
|
||||
(t2SBCSri rGPR:$src, imm0_255_not:$imm)>;
|
||||
def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm),
|
||||
def : T2Pat<(adde_live_carry rGPR:$src, t2_so_imm_not:$imm),
|
||||
(t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>;
|
||||
|
||||
// Select Bytes -- for disassembly only
|
||||
|
|
|
@ -19,3 +19,20 @@ entry:
|
|||
%tmp2 = sub i64 %tmp1, %b
|
||||
ret i64 %tmp2
|
||||
}
|
||||
|
||||
; add with live carry
|
||||
define i64 @f3(i32 %al, i32 %bl) {
|
||||
; CHECK: f3:
|
||||
; CHECK: adds r
|
||||
; CHECK: adcs r
|
||||
; CHECK: adc r
|
||||
entry:
|
||||
; unsigned wide add
|
||||
%aw = zext i32 %al to i64
|
||||
%bw = zext i32 %bl to i64
|
||||
%cw = add i64 %aw, %bw
|
||||
; ch == carry bit
|
||||
%ch = lshr i64 %cw, 32
|
||||
%dw = add i64 %ch, %bw
|
||||
ret i64 %dw
|
||||
}
|
||||
|
|
|
@ -52,3 +52,18 @@ define i64 @f6(i64 %a) {
|
|||
ret i64 %tmp
|
||||
}
|
||||
|
||||
; Example from numerics code that manually computes wider-than-64 values.
|
||||
;
|
||||
; CHECK: _livecarry:
|
||||
; CHECK: adds
|
||||
; CHECK: adcs
|
||||
; CHECK: adc
|
||||
define i64 @livecarry(i64 %carry, i32 %digit) nounwind {
|
||||
%ch = lshr i64 %carry, 32
|
||||
%cl = and i64 %carry, 4294967295
|
||||
%truncdigit = zext i32 %digit to i64
|
||||
%prod = add i64 %cl, %truncdigit
|
||||
%ph = lshr i64 %prod, 32
|
||||
%carryresult = add i64 %ch, %ph
|
||||
ret i64 %carryresult
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
define i64 @f1(i64 %a) {
|
||||
; CHECK: f1
|
||||
; CHECK: subs r0, #171
|
||||
; CHECK: adc r1, r1, #-1
|
||||
; CHECK: sbc r1, r1, #0
|
||||
%tmp = sub i64 %a, 171
|
||||
ret i64 %tmp
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ define i64 @f1(i64 %a) {
|
|||
define i64 @f2(i64 %a) {
|
||||
; CHECK: f2
|
||||
; CHECK: subs.w r0, r0, #1179666
|
||||
; CHECK: adc r1, r1, #-1
|
||||
; CHECK: sbc r1, r1, #0
|
||||
%tmp = sub i64 %a, 1179666
|
||||
ret i64 %tmp
|
||||
}
|
||||
|
@ -22,7 +22,7 @@ define i64 @f2(i64 %a) {
|
|||
define i64 @f3(i64 %a) {
|
||||
; CHECK: f3
|
||||
; CHECK: subs.w r0, r0, #872428544
|
||||
; CHECK: adc r1, r1, #-1
|
||||
; CHECK: sbc r1, r1, #0
|
||||
%tmp = sub i64 %a, 872428544
|
||||
ret i64 %tmp
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ define i64 @f3(i64 %a) {
|
|||
define i64 @f4(i64 %a) {
|
||||
; CHECK: f4
|
||||
; CHECK: subs.w r0, r0, #1448498774
|
||||
; CHECK: adc r1, r1, #-1
|
||||
; CHECK: sbc r1, r1, #0
|
||||
%tmp = sub i64 %a, 1448498774
|
||||
ret i64 %tmp
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ define i64 @f4(i64 %a) {
|
|||
define i64 @f5(i64 %a) {
|
||||
; CHECK: f5
|
||||
; CHECK: subs.w r0, r0, #66846720
|
||||
; CHECK: adc r1, r1, #-1
|
||||
; CHECK: sbc r1, r1, #0
|
||||
%tmp = sub i64 %a, 66846720
|
||||
ret i64 %tmp
|
||||
}
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
|
||||
; RUN: llc < %s -march=thumb -mattr=+thumb2 -mattr=+32bit | FileCheck %s
|
||||
|
||||
define i64 @f1(i64 %a, i64 %b) {
|
||||
; CHECK: f1:
|
||||
; CHECK: subs r0, r0, r2
|
||||
; CHECK: sbcs r1, r3
|
||||
; CHECK: subs.w r0, r0, r2
|
||||
; To test dead_carry, +32bit prevents sbc conveting to 16-bit sbcs
|
||||
; CHECK: sbc.w r1, r1, r3
|
||||
%tmp = sub i64 %a, %b
|
||||
ret i64 %tmp
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue