forked from OSchip/llvm-project
ARM and Thumb2 support for atomic MIN/MAX/UMIN/UMAX loads.
rdar://9326019 llvm-svn: 130234
This commit is contained in:
parent
a59b0a6e3c
commit
d4b733e4d8
|
@ -609,6 +609,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
|||
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand);
|
||||
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
|
||||
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
|
||||
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand);
|
||||
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand);
|
||||
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
|
||||
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand);
|
||||
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand);
|
||||
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
|
||||
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand);
|
||||
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand);
|
||||
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
|
||||
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand);
|
||||
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand);
|
||||
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
|
||||
// Since the libcalls include locking, fold in the fences
|
||||
setShouldFoldAtomicFences(true);
|
||||
}
|
||||
|
@ -5031,6 +5043,109 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
|
|||
return BB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
|
||||
MachineBasicBlock *BB,
|
||||
unsigned Size,
|
||||
bool signExtend,
|
||||
ARMCC::CondCodes Cond) const {
|
||||
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
|
||||
|
||||
const BasicBlock *LLVM_BB = BB->getBasicBlock();
|
||||
MachineFunction *MF = BB->getParent();
|
||||
MachineFunction::iterator It = BB;
|
||||
++It;
|
||||
|
||||
unsigned dest = MI->getOperand(0).getReg();
|
||||
unsigned ptr = MI->getOperand(1).getReg();
|
||||
unsigned incr = MI->getOperand(2).getReg();
|
||||
unsigned oldval = dest;
|
||||
DebugLoc dl = MI->getDebugLoc();
|
||||
|
||||
bool isThumb2 = Subtarget->isThumb2();
|
||||
unsigned ldrOpc, strOpc, extendOpc;
|
||||
switch (Size) {
|
||||
default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
|
||||
case 1:
|
||||
ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
|
||||
strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
|
||||
extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr;
|
||||
break;
|
||||
case 2:
|
||||
ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
|
||||
strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
|
||||
extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr;
|
||||
break;
|
||||
case 4:
|
||||
ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
|
||||
strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
|
||||
extendOpc = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
|
||||
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
|
||||
MF->insert(It, loopMBB);
|
||||
MF->insert(It, exitMBB);
|
||||
|
||||
// Transfer the remainder of BB and its successor edges to exitMBB.
|
||||
exitMBB->splice(exitMBB->begin(), BB,
|
||||
llvm::next(MachineBasicBlock::iterator(MI)),
|
||||
BB->end());
|
||||
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
|
||||
|
||||
MachineRegisterInfo &RegInfo = MF->getRegInfo();
|
||||
unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
|
||||
unsigned scratch2 = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
|
||||
|
||||
// thisMBB:
|
||||
// ...
|
||||
// fallthrough --> loopMBB
|
||||
BB->addSuccessor(loopMBB);
|
||||
|
||||
// loopMBB:
|
||||
// ldrex dest, ptr
|
||||
// (sign extend dest, if required)
|
||||
// cmp dest, incr
|
||||
// cmov.cond scratch2, dest, incr
|
||||
// strex scratch, scratch2, ptr
|
||||
// cmp scratch, #0
|
||||
// bne- loopMBB
|
||||
// fallthrough --> exitMBB
|
||||
BB = loopMBB;
|
||||
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
|
||||
|
||||
// Sign extend the value, if necessary.
|
||||
if (signExtend && extendOpc) {
|
||||
oldval = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
|
||||
AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest));
|
||||
}
|
||||
|
||||
// Build compare and cmov instructions.
|
||||
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
|
||||
.addReg(oldval).addReg(incr));
|
||||
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
|
||||
.addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
|
||||
|
||||
AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
|
||||
.addReg(ptr));
|
||||
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
|
||||
.addReg(scratch).addImm(0));
|
||||
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
|
||||
.addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
|
||||
|
||||
BB->addSuccessor(loopMBB);
|
||||
BB->addSuccessor(exitMBB);
|
||||
|
||||
// exitMBB:
|
||||
// ...
|
||||
BB = exitMBB;
|
||||
|
||||
MI->eraseFromParent(); // The instruction is gone now.
|
||||
|
||||
return BB;
|
||||
}
|
||||
|
||||
static
|
||||
MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
|
||||
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
|
||||
|
@ -5162,6 +5277,34 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
|||
case ARM::ATOMIC_LOAD_SUB_I32:
|
||||
return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
|
||||
|
||||
case ARM::ATOMIC_LOAD_MIN_I8:
|
||||
return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
|
||||
case ARM::ATOMIC_LOAD_MIN_I16:
|
||||
return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
|
||||
case ARM::ATOMIC_LOAD_MIN_I32:
|
||||
return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
|
||||
|
||||
case ARM::ATOMIC_LOAD_MAX_I8:
|
||||
return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
|
||||
case ARM::ATOMIC_LOAD_MAX_I16:
|
||||
return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
|
||||
case ARM::ATOMIC_LOAD_MAX_I32:
|
||||
return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
|
||||
|
||||
case ARM::ATOMIC_LOAD_UMIN_I8:
|
||||
return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
|
||||
case ARM::ATOMIC_LOAD_UMIN_I16:
|
||||
return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
|
||||
case ARM::ATOMIC_LOAD_UMIN_I32:
|
||||
return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
|
||||
|
||||
case ARM::ATOMIC_LOAD_UMAX_I8:
|
||||
return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
|
||||
case ARM::ATOMIC_LOAD_UMAX_I16:
|
||||
return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
|
||||
case ARM::ATOMIC_LOAD_UMAX_I32:
|
||||
return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
|
||||
|
||||
case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
|
||||
case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
|
||||
case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
|
||||
|
|
|
@ -484,6 +484,11 @@ namespace llvm {
|
|||
MachineBasicBlock *BB,
|
||||
unsigned Size,
|
||||
unsigned BinOpcode) const;
|
||||
MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
|
||||
MachineBasicBlock *BB,
|
||||
unsigned Size,
|
||||
bool signExtend,
|
||||
ARMCC::CondCodes Cond) const;
|
||||
|
||||
bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
|
||||
};
|
||||
|
|
|
@ -3256,6 +3256,18 @@ let usesCustomInserter = 1 in {
|
|||
def ATOMIC_LOAD_NAND_I8 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
|
||||
def ATOMIC_LOAD_MIN_I8 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
|
||||
def ATOMIC_LOAD_MAX_I8 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
|
||||
def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
|
||||
def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
|
||||
def ATOMIC_LOAD_ADD_I16 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
|
||||
|
@ -3274,6 +3286,18 @@ let usesCustomInserter = 1 in {
|
|||
def ATOMIC_LOAD_NAND_I16 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
|
||||
def ATOMIC_LOAD_MIN_I16 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
|
||||
def ATOMIC_LOAD_MAX_I16 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
|
||||
def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
|
||||
def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
|
||||
def ATOMIC_LOAD_ADD_I32 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
|
||||
|
@ -3292,6 +3316,18 @@ let usesCustomInserter = 1 in {
|
|||
def ATOMIC_LOAD_NAND_I32 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
|
||||
def ATOMIC_LOAD_MIN_I32 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
|
||||
def ATOMIC_LOAD_MAX_I32 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
|
||||
def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
|
||||
def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||
[(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
|
||||
|
||||
def ATOMIC_SWAP_I8 : PseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
|
||||
|
|
Loading…
Reference in New Issue