ARM and Thumb2 support for atomic MIN/MAX/UMIN/UMAX loads.

rdar://9326019

llvm-svn: 130234
This commit is contained in:
Jim Grosbach 2011-04-26 19:44:18 +00:00
parent a59b0a6e3c
commit d4b733e4d8
3 changed files with 184 additions and 0 deletions

View File

@ -609,6 +609,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand);
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand);
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
// Since the libcalls include locking, fold in the fences // Since the libcalls include locking, fold in the fences
setShouldFoldAtomicFences(true); setShouldFoldAtomicFences(true);
} }
@ -5031,6 +5043,109 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
return BB; return BB;
} }
MachineBasicBlock *
ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size,
bool signExtend,
ARMCC::CondCodes Cond) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *MF = BB->getParent();
MachineFunction::iterator It = BB;
++It;
unsigned dest = MI->getOperand(0).getReg();
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
unsigned oldval = dest;
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
unsigned ldrOpc, strOpc, extendOpc;
switch (Size) {
default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
case 1:
ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr;
break;
case 2:
ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr;
break;
case 4:
ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
extendOpc = 0;
break;
}
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, loopMBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = MF->getRegInfo();
unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
unsigned scratch2 = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
// thisMBB:
// ...
// fallthrough --> loopMBB
BB->addSuccessor(loopMBB);
// loopMBB:
// ldrex dest, ptr
// (sign extend dest, if required)
// cmp dest, incr
// cmov.cond scratch2, dest, incr
// strex scratch, scratch2, ptr
// cmp scratch, #0
// bne- loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
// Sign extend the value, if necessary.
if (signExtend && extendOpc) {
oldval = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest));
}
// Build compare and cmov instructions.
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
.addReg(oldval).addReg(incr));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
.addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
.addReg(ptr));
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(scratch).addImm(0));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
.addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
static static
MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
@ -5162,6 +5277,34 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::ATOMIC_LOAD_SUB_I32: case ARM::ATOMIC_LOAD_SUB_I32:
return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
case ARM::ATOMIC_LOAD_MIN_I8:
return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
case ARM::ATOMIC_LOAD_MIN_I16:
return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
case ARM::ATOMIC_LOAD_MIN_I32:
return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
case ARM::ATOMIC_LOAD_MAX_I8:
return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
case ARM::ATOMIC_LOAD_MAX_I16:
return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
case ARM::ATOMIC_LOAD_MAX_I32:
return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
case ARM::ATOMIC_LOAD_UMIN_I8:
return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
case ARM::ATOMIC_LOAD_UMIN_I16:
return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
case ARM::ATOMIC_LOAD_UMIN_I32:
return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
case ARM::ATOMIC_LOAD_UMAX_I8:
return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
case ARM::ATOMIC_LOAD_UMAX_I16:
return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
case ARM::ATOMIC_LOAD_UMAX_I32:
return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);

View File

@ -484,6 +484,11 @@ namespace llvm {
MachineBasicBlock *BB, MachineBasicBlock *BB,
unsigned Size, unsigned Size,
unsigned BinOpcode) const; unsigned BinOpcode) const;
MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size,
bool signExtend,
ARMCC::CondCodes Cond) const;
bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const; bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
}; };

View File

@ -3256,6 +3256,18 @@ let usesCustomInserter = 1 in {
def ATOMIC_LOAD_NAND_I8 : PseudoInst< def ATOMIC_LOAD_NAND_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
[(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>; [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_MIN_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
[(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_MAX_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
[(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
[(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
[(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_ADD_I16 : PseudoInst< def ATOMIC_LOAD_ADD_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
[(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>; [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
@ -3274,6 +3286,18 @@ let usesCustomInserter = 1 in {
def ATOMIC_LOAD_NAND_I16 : PseudoInst< def ATOMIC_LOAD_NAND_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
[(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>; [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_MIN_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
[(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_MAX_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
[(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
[(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
[(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_ADD_I32 : PseudoInst< def ATOMIC_LOAD_ADD_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
[(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>; [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
@ -3292,6 +3316,18 @@ let usesCustomInserter = 1 in {
def ATOMIC_LOAD_NAND_I32 : PseudoInst< def ATOMIC_LOAD_NAND_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
[(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>; [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_MIN_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
[(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_MAX_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
[(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
[(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
[(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
def ATOMIC_SWAP_I8 : PseudoInst< def ATOMIC_SWAP_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,