forked from OSchip/llvm-project
ARM and Thumb2 support for atomic MIN/MAX/UMIN/UMAX loads.
rdar://9326019 llvm-svn: 130234
This commit is contained in:
parent
a59b0a6e3c
commit
d4b733e4d8
|
@ -609,6 +609,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
||||||
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand);
|
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand);
|
||||||
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
|
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
|
||||||
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
|
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
|
||||||
|
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand);
|
||||||
|
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand);
|
||||||
|
setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
|
||||||
|
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand);
|
||||||
|
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand);
|
||||||
|
setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
|
||||||
|
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand);
|
||||||
|
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand);
|
||||||
|
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
|
||||||
|
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand);
|
||||||
|
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand);
|
||||||
|
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
|
||||||
// Since the libcalls include locking, fold in the fences
|
// Since the libcalls include locking, fold in the fences
|
||||||
setShouldFoldAtomicFences(true);
|
setShouldFoldAtomicFences(true);
|
||||||
}
|
}
|
||||||
|
@ -5031,6 +5043,109 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
|
||||||
return BB;
|
return BB;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MachineBasicBlock *
|
||||||
|
ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
|
||||||
|
MachineBasicBlock *BB,
|
||||||
|
unsigned Size,
|
||||||
|
bool signExtend,
|
||||||
|
ARMCC::CondCodes Cond) const {
|
||||||
|
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
|
||||||
|
|
||||||
|
const BasicBlock *LLVM_BB = BB->getBasicBlock();
|
||||||
|
MachineFunction *MF = BB->getParent();
|
||||||
|
MachineFunction::iterator It = BB;
|
||||||
|
++It;
|
||||||
|
|
||||||
|
unsigned dest = MI->getOperand(0).getReg();
|
||||||
|
unsigned ptr = MI->getOperand(1).getReg();
|
||||||
|
unsigned incr = MI->getOperand(2).getReg();
|
||||||
|
unsigned oldval = dest;
|
||||||
|
DebugLoc dl = MI->getDebugLoc();
|
||||||
|
|
||||||
|
bool isThumb2 = Subtarget->isThumb2();
|
||||||
|
unsigned ldrOpc, strOpc, extendOpc;
|
||||||
|
switch (Size) {
|
||||||
|
default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
|
||||||
|
case 1:
|
||||||
|
ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
|
||||||
|
strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
|
||||||
|
extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
|
||||||
|
strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
|
||||||
|
extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr;
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
|
||||||
|
strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
|
||||||
|
extendOpc = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
|
||||||
|
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
|
||||||
|
MF->insert(It, loopMBB);
|
||||||
|
MF->insert(It, exitMBB);
|
||||||
|
|
||||||
|
// Transfer the remainder of BB and its successor edges to exitMBB.
|
||||||
|
exitMBB->splice(exitMBB->begin(), BB,
|
||||||
|
llvm::next(MachineBasicBlock::iterator(MI)),
|
||||||
|
BB->end());
|
||||||
|
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
|
||||||
|
|
||||||
|
MachineRegisterInfo &RegInfo = MF->getRegInfo();
|
||||||
|
unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
|
||||||
|
unsigned scratch2 = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
|
||||||
|
|
||||||
|
// thisMBB:
|
||||||
|
// ...
|
||||||
|
// fallthrough --> loopMBB
|
||||||
|
BB->addSuccessor(loopMBB);
|
||||||
|
|
||||||
|
// loopMBB:
|
||||||
|
// ldrex dest, ptr
|
||||||
|
// (sign extend dest, if required)
|
||||||
|
// cmp dest, incr
|
||||||
|
// cmov.cond scratch2, dest, incr
|
||||||
|
// strex scratch, scratch2, ptr
|
||||||
|
// cmp scratch, #0
|
||||||
|
// bne- loopMBB
|
||||||
|
// fallthrough --> exitMBB
|
||||||
|
BB = loopMBB;
|
||||||
|
AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
|
||||||
|
|
||||||
|
// Sign extend the value, if necessary.
|
||||||
|
if (signExtend && extendOpc) {
|
||||||
|
oldval = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
|
||||||
|
AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build compare and cmov instructions.
|
||||||
|
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
|
||||||
|
.addReg(oldval).addReg(incr));
|
||||||
|
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
|
||||||
|
.addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
|
||||||
|
|
||||||
|
AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
|
||||||
|
.addReg(ptr));
|
||||||
|
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
|
||||||
|
.addReg(scratch).addImm(0));
|
||||||
|
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
|
||||||
|
.addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
|
||||||
|
|
||||||
|
BB->addSuccessor(loopMBB);
|
||||||
|
BB->addSuccessor(exitMBB);
|
||||||
|
|
||||||
|
// exitMBB:
|
||||||
|
// ...
|
||||||
|
BB = exitMBB;
|
||||||
|
|
||||||
|
MI->eraseFromParent(); // The instruction is gone now.
|
||||||
|
|
||||||
|
return BB;
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
|
MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
|
||||||
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
|
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
|
||||||
|
@ -5162,6 +5277,34 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
||||||
case ARM::ATOMIC_LOAD_SUB_I32:
|
case ARM::ATOMIC_LOAD_SUB_I32:
|
||||||
return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
|
return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
|
||||||
|
|
||||||
|
case ARM::ATOMIC_LOAD_MIN_I8:
|
||||||
|
return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
|
||||||
|
case ARM::ATOMIC_LOAD_MIN_I16:
|
||||||
|
return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
|
||||||
|
case ARM::ATOMIC_LOAD_MIN_I32:
|
||||||
|
return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
|
||||||
|
|
||||||
|
case ARM::ATOMIC_LOAD_MAX_I8:
|
||||||
|
return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
|
||||||
|
case ARM::ATOMIC_LOAD_MAX_I16:
|
||||||
|
return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
|
||||||
|
case ARM::ATOMIC_LOAD_MAX_I32:
|
||||||
|
return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
|
||||||
|
|
||||||
|
case ARM::ATOMIC_LOAD_UMIN_I8:
|
||||||
|
return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
|
||||||
|
case ARM::ATOMIC_LOAD_UMIN_I16:
|
||||||
|
return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
|
||||||
|
case ARM::ATOMIC_LOAD_UMIN_I32:
|
||||||
|
return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
|
||||||
|
|
||||||
|
case ARM::ATOMIC_LOAD_UMAX_I8:
|
||||||
|
return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
|
||||||
|
case ARM::ATOMIC_LOAD_UMAX_I16:
|
||||||
|
return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
|
||||||
|
case ARM::ATOMIC_LOAD_UMAX_I32:
|
||||||
|
return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
|
||||||
|
|
||||||
case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
|
case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
|
||||||
case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
|
case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
|
||||||
case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
|
case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
|
||||||
|
|
|
@ -484,6 +484,11 @@ namespace llvm {
|
||||||
MachineBasicBlock *BB,
|
MachineBasicBlock *BB,
|
||||||
unsigned Size,
|
unsigned Size,
|
||||||
unsigned BinOpcode) const;
|
unsigned BinOpcode) const;
|
||||||
|
MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
|
||||||
|
MachineBasicBlock *BB,
|
||||||
|
unsigned Size,
|
||||||
|
bool signExtend,
|
||||||
|
ARMCC::CondCodes Cond) const;
|
||||||
|
|
||||||
bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
|
bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
|
||||||
};
|
};
|
||||||
|
|
|
@ -3256,6 +3256,18 @@ let usesCustomInserter = 1 in {
|
||||||
def ATOMIC_LOAD_NAND_I8 : PseudoInst<
|
def ATOMIC_LOAD_NAND_I8 : PseudoInst<
|
||||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
|
||||||
[(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
|
[(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
|
||||||
|
def ATOMIC_LOAD_MIN_I8 : PseudoInst<
|
||||||
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||||
|
[(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
|
||||||
|
def ATOMIC_LOAD_MAX_I8 : PseudoInst<
|
||||||
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||||
|
[(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
|
||||||
|
def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
|
||||||
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||||
|
[(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
|
||||||
|
def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
|
||||||
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||||
|
[(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
|
||||||
def ATOMIC_LOAD_ADD_I16 : PseudoInst<
|
def ATOMIC_LOAD_ADD_I16 : PseudoInst<
|
||||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
|
||||||
[(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
|
[(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
|
||||||
|
@ -3274,6 +3286,18 @@ let usesCustomInserter = 1 in {
|
||||||
def ATOMIC_LOAD_NAND_I16 : PseudoInst<
|
def ATOMIC_LOAD_NAND_I16 : PseudoInst<
|
||||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
|
||||||
[(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
|
[(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
|
||||||
|
def ATOMIC_LOAD_MIN_I16 : PseudoInst<
|
||||||
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||||
|
[(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
|
||||||
|
def ATOMIC_LOAD_MAX_I16 : PseudoInst<
|
||||||
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||||
|
[(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
|
||||||
|
def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
|
||||||
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||||
|
[(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
|
||||||
|
def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
|
||||||
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||||
|
[(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
|
||||||
def ATOMIC_LOAD_ADD_I32 : PseudoInst<
|
def ATOMIC_LOAD_ADD_I32 : PseudoInst<
|
||||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
|
||||||
[(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
|
[(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
|
||||||
|
@ -3292,6 +3316,18 @@ let usesCustomInserter = 1 in {
|
||||||
def ATOMIC_LOAD_NAND_I32 : PseudoInst<
|
def ATOMIC_LOAD_NAND_I32 : PseudoInst<
|
||||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
|
||||||
[(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
|
[(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
|
||||||
|
def ATOMIC_LOAD_MIN_I32 : PseudoInst<
|
||||||
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||||
|
[(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
|
||||||
|
def ATOMIC_LOAD_MAX_I32 : PseudoInst<
|
||||||
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||||
|
[(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
|
||||||
|
def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
|
||||||
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||||
|
[(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
|
||||||
|
def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
|
||||||
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
|
||||||
|
[(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
|
||||||
|
|
||||||
def ATOMIC_SWAP_I8 : PseudoInst<
|
def ATOMIC_SWAP_I8 : PseudoInst<
|
||||||
(outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
|
(outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
|
||||||
|
|
Loading…
Reference in New Issue