Revert "[mips] Fix atomic compare and swap at O0."

This reverts r296132. I forgot to include the tests.

llvm-svn: 296133
This commit is contained in:
Simon Dardis 2017-02-24 16:30:27 +00:00
parent cf0e06d375
commit 3c58c18ff0
7 changed files with 154 additions and 70 deletions

View File

@ -26,7 +26,6 @@ add_llvm_target(MipsCodeGen
MipsCCState.cpp MipsCCState.cpp
MipsConstantIslandPass.cpp MipsConstantIslandPass.cpp
MipsDelaySlotFiller.cpp MipsDelaySlotFiller.cpp
MipsExpandPseudo.cpp
MipsFastISel.cpp MipsFastISel.cpp
MipsHazardSchedule.cpp MipsHazardSchedule.cpp
MipsInstrInfo.cpp MipsInstrInfo.cpp

View File

@ -32,7 +32,6 @@ namespace llvm {
FunctionPass *createMipsHazardSchedule(); FunctionPass *createMipsHazardSchedule();
FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
FunctionPass *createMipsConstantIslandPass(); FunctionPass *createMipsConstantIslandPass();
FunctionPass *createMipsExpandPseudoPass();
} // end namespace llvm; } // end namespace llvm;
#endif #endif

View File

@ -73,9 +73,6 @@ let usesCustomInserter = 1 in {
def ATOMIC_LOAD_XOR_I64 : Atomic2Ops<atomic_load_xor_64, GPR64>; def ATOMIC_LOAD_XOR_I64 : Atomic2Ops<atomic_load_xor_64, GPR64>;
def ATOMIC_LOAD_NAND_I64 : Atomic2Ops<atomic_load_nand_64, GPR64>; def ATOMIC_LOAD_NAND_I64 : Atomic2Ops<atomic_load_nand_64, GPR64>;
def ATOMIC_SWAP_I64 : Atomic2Ops<atomic_swap_64, GPR64>; def ATOMIC_SWAP_I64 : Atomic2Ops<atomic_swap_64, GPR64>;
}
let isPseudo = 1 in {
def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<atomic_cmp_swap_64, GPR64>; def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<atomic_cmp_swap_64, GPR64>;
} }

View File

@ -1053,11 +1053,14 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case Mips::ATOMIC_SWAP_I64: case Mips::ATOMIC_SWAP_I64:
return emitAtomicBinary(MI, BB, 8, 0); return emitAtomicBinary(MI, BB, 8, 0);
case Mips::ATOMIC_CMP_SWAP_I8_PSEUDO: case Mips::ATOMIC_CMP_SWAP_I8:
return emitAtomicCmpSwapPartword(MI, BB, 1); return emitAtomicCmpSwapPartword(MI, BB, 1);
case Mips::ATOMIC_CMP_SWAP_I16_PSEUDO: case Mips::ATOMIC_CMP_SWAP_I16:
return emitAtomicCmpSwapPartword(MI, BB, 2); return emitAtomicCmpSwapPartword(MI, BB, 2);
case Mips::ATOMIC_CMP_SWAP_I32:
return emitAtomicCmpSwap(MI, BB, 4);
case Mips::ATOMIC_CMP_SWAP_I64:
return emitAtomicCmpSwap(MI, BB, 8);
case Mips::PseudoSDIV: case Mips::PseudoSDIV:
case Mips::PseudoUDIV: case Mips::PseudoUDIV:
case Mips::DIV: case Mips::DIV:
@ -1404,6 +1407,96 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
return exitMBB; return exitMBB;
} }
MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI,
MachineBasicBlock *BB,
unsigned Size) const {
assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap.");
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
const bool ArePtrs64bit = ABI.ArePtrs64bit();
DebugLoc DL = MI.getDebugLoc();
unsigned LL, SC, ZERO, BNE, BEQ;
if (Size == 4) {
if (isMicroMips) {
LL = Mips::LL_MM;
SC = Mips::SC_MM;
} else {
LL = Subtarget.hasMips32r6()
? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6)
: (ArePtrs64bit ? Mips::LL64 : Mips::LL);
SC = Subtarget.hasMips32r6()
? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6)
: (ArePtrs64bit ? Mips::SC64 : Mips::SC);
}
ZERO = Mips::ZERO;
BNE = Mips::BNE;
BEQ = Mips::BEQ;
} else {
LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD;
SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD;
ZERO = Mips::ZERO_64;
BNE = Mips::BNE64;
BEQ = Mips::BEQ64;
}
unsigned Dest = MI.getOperand(0).getReg();
unsigned Ptr = MI.getOperand(1).getReg();
unsigned OldVal = MI.getOperand(2).getReg();
unsigned NewVal = MI.getOperand(3).getReg();
unsigned Success = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator It = ++BB->getIterator();
MF->insert(It, loop1MBB);
MF->insert(It, loop2MBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
// ...
// fallthrough --> loop1MBB
BB->addSuccessor(loop1MBB);
loop1MBB->addSuccessor(exitMBB);
loop1MBB->addSuccessor(loop2MBB);
loop2MBB->addSuccessor(loop1MBB);
loop2MBB->addSuccessor(exitMBB);
// loop1MBB:
// ll dest, 0(ptr)
// bne dest, oldval, exitMBB
BB = loop1MBB;
BuildMI(BB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0);
BuildMI(BB, DL, TII->get(BNE))
.addReg(Dest).addReg(OldVal).addMBB(exitMBB);
// loop2MBB:
// sc success, newval, 0(ptr)
// beq success, $0, loop1MBB
BB = loop2MBB;
BuildMI(BB, DL, TII->get(SC), Success)
.addReg(NewVal).addReg(Ptr).addImm(0);
BuildMI(BB, DL, TII->get(BEQ))
.addReg(Success).addReg(ZERO).addMBB(loop1MBB);
MI.eraseFromParent(); // The instruction is gone now.
return exitMBB;
}
MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const {
assert((Size == 1 || Size == 2) && assert((Size == 1 || Size == 2) &&
@ -1428,15 +1521,18 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
unsigned Mask = RegInfo.createVirtualRegister(RC); unsigned Mask = RegInfo.createVirtualRegister(RC);
unsigned Mask2 = RegInfo.createVirtualRegister(RC); unsigned Mask2 = RegInfo.createVirtualRegister(RC);
unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC); unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC);
unsigned OldVal = RegInfo.createVirtualRegister(RC);
unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC);
unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC); unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC);
unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp); unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp);
unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC); unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC);
unsigned MaskUpper = RegInfo.createVirtualRegister(RC); unsigned MaskUpper = RegInfo.createVirtualRegister(RC);
unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC);
unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC);
unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I8_PSEUDO unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
? Mips::ATOMIC_CMP_SWAP_I8_FRAG unsigned StoreVal = RegInfo.createVirtualRegister(RC);
: Mips::ATOMIC_CMP_SWAP_I16_FRAG; unsigned SrlRes = RegInfo.createVirtualRegister(RC);
unsigned Success = RegInfo.createVirtualRegister(RC);
unsigned LL, SC; unsigned LL, SC;
if (isMicroMips) { if (isMicroMips) {
@ -1451,8 +1547,14 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
// insert new blocks after the current block // insert new blocks after the current block
const BasicBlock *LLVM_BB = BB->getBasicBlock(); const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator It = ++BB->getIterator(); MachineFunction::iterator It = ++BB->getIterator();
MF->insert(It, loop1MBB);
MF->insert(It, loop2MBB);
MF->insert(It, sinkMBB);
MF->insert(It, exitMBB); MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB. // Transfer the remainder of BB and its successor edges to exitMBB.
@ -1460,7 +1562,12 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
std::next(MachineBasicBlock::iterator(MI)), BB->end()); std::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB); exitMBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(exitMBB); BB->addSuccessor(loop1MBB);
loop1MBB->addSuccessor(sinkMBB);
loop1MBB->addSuccessor(loop2MBB);
loop2MBB->addSuccessor(loop1MBB);
loop2MBB->addSuccessor(sinkMBB);
sinkMBB->addSuccessor(exitMBB);
// FIXME: computation of newval2 can be moved to loop2MBB. // FIXME: computation of newval2 can be moved to loop2MBB.
// thisMBB: // thisMBB:
@ -1505,31 +1612,40 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal) BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal)
.addReg(MaskedNewVal).addReg(ShiftAmt); .addReg(MaskedNewVal).addReg(ShiftAmt);
// For correctness purpose, a new pseudo is introduced here. We need this // loop1MBB:
// new pseudo, so that FastRegisterAllocator does not see an ll/sc sequence // ll oldval,0(alginedaddr)
// that is spread over >1 basic blocks. A register allocator which // and maskedoldval0,oldval,mask
// introduces (or any codegen infact) a store, can violate the expactations // bne maskedoldval0,shiftedcmpval,sinkMBB
// of the hardware. BB = loop1MBB;
// BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
// An atomic read-modify-write sequence starts with a linked load BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0)
// instruction and ends with a store conditional instruction. The atomic .addReg(OldVal).addReg(Mask);
// read-modify-write sequence failes if any of the following conditions BuildMI(BB, DL, TII->get(Mips::BNE))
// occur between the execution of ll and sc: .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB);
// * A coherent store is completed by another process or coherent I/O
// module into the block of synchronizable physical memory containing // loop2MBB:
// the word. The size and alignment of the block is // and maskedoldval1,oldval,mask2
// implementation-dependent. // or storeval,maskedoldval1,shiftednewval
// * A coherent store is executed between an LL and SC sequence on the // sc success,storeval,0(alignedaddr)
// same processor to the block of synchornizable physical memory // beq success,$0,loop1MBB
// containing the word. BB = loop2MBB;
// BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1)
BuildMI(BB, DL, TII->get(AtomicOp), Dest) .addReg(OldVal).addReg(Mask2);
.addReg(AlignedAddr) BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
.addReg(Mask) .addReg(MaskedOldVal1).addReg(ShiftedNewVal);
.addReg(ShiftedCmpVal) BuildMI(BB, DL, TII->get(SC), Success)
.addReg(Mask2) .addReg(StoreVal).addReg(AlignedAddr).addImm(0);
.addReg(ShiftedNewVal) BuildMI(BB, DL, TII->get(Mips::BEQ))
.addReg(ShiftAmt); .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
// sinkMBB:
// srl srlres,maskedoldval0,shiftamt
// sign_extend dest,srlres
BB = sinkMBB;
BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
.addReg(MaskedOldVal0).addReg(ShiftAmt);
BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes);
MI.eraseFromParent(); // The instruction is gone now. MI.eraseFromParent(); // The instruction is gone now.

View File

@ -1666,10 +1666,6 @@ class AtomicCmpSwap<PatFrag Op, RegisterClass DRC> :
PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap), PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap),
[(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>; [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>;
class AtomicCmpSwapSubword<RegisterClass RC> :
PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal,
RC:$mask2, RC:$ShiftNewVal, RC:$ShiftAmt), []>;
class LLBase<string opstr, RegisterOperand RO, DAGOperand MO = mem> : class LLBase<string opstr, RegisterOperand RO, DAGOperand MO = mem> :
InstSE<(outs RO:$rt), (ins MO:$addr), !strconcat(opstr, "\t$rt, $addr"), InstSE<(outs RO:$rt), (ins MO:$addr), !strconcat(opstr, "\t$rt, $addr"),
[], II_LL, FrmI, opstr> { [], II_LL, FrmI, opstr> {
@ -1748,21 +1744,11 @@ let usesCustomInserter = 1 in {
def ATOMIC_SWAP_I16 : Atomic2Ops<atomic_swap_16, GPR32>; def ATOMIC_SWAP_I16 : Atomic2Ops<atomic_swap_16, GPR32>;
def ATOMIC_SWAP_I32 : Atomic2Ops<atomic_swap_32, GPR32>; def ATOMIC_SWAP_I32 : Atomic2Ops<atomic_swap_32, GPR32>;
def ATOMIC_CMP_SWAP_I8_PSEUDO : AtomicCmpSwap<atomic_cmp_swap_8, GPR32>; def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<atomic_cmp_swap_8, GPR32>;
def ATOMIC_CMP_SWAP_I16_PSEUDO : AtomicCmpSwap<atomic_cmp_swap_16, GPR32>; def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<atomic_cmp_swap_16, GPR32>;
}
let isPseudo = 1 in {
// The expansion of ATOMIC_CMP_SWAP_I(8|16) occurs in two parts. First,
// the *_PSEUDO is partially lowering during ISelLowering to compute the
// aligned addresses and necessary masks, along with another pseudo which
// represents the ll/sc loop. That pseudo is lowered after the basic
// postRA pseudos have been lowered.
def ATOMIC_CMP_SWAP_I8_FRAG : AtomicCmpSwapSubword<GPR32>;
def ATOMIC_CMP_SWAP_I16_FRAG : AtomicCmpSwapSubword<GPR32>;
def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<atomic_cmp_swap_32, GPR32>; def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<atomic_cmp_swap_32, GPR32>;
} }
/// Pseudo instructions for loading and storing accumulator registers. /// Pseudo instructions for loading and storing accumulator registers.
let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in { let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
def LOAD_ACC64 : Load<"", ACC64>; def LOAD_ACC64 : Load<"", ACC64>;

View File

@ -213,7 +213,6 @@ public:
bool addInstSelector() override; bool addInstSelector() override;
void addPreEmitPass() override; void addPreEmitPass() override;
void addPreRegAlloc() override; void addPreRegAlloc() override;
void addPreSched2() override;
}; };
} // end anonymous namespace } // end anonymous namespace
@ -271,7 +270,3 @@ void MipsPassConfig::addPreEmitPass() {
addPass(createMipsLongBranchPass(TM)); addPass(createMipsLongBranchPass(TM));
addPass(createMipsConstantIslandPass()); addPass(createMipsConstantIslandPass());
} }
void MipsPassConfig::addPreSched2() {
addPass(createMipsExpandPseudoPass());
}

View File

@ -5,21 +5,13 @@
; RUN: llc -O0 -march=mips64el -mcpu=mips64r2 -target-abi=n64 < %s -filetype=asm -o - \ ; RUN: llc -O0 -march=mips64el -mcpu=mips64r2 -target-abi=n64 < %s -filetype=asm -o - \
; RUN: | FileCheck -check-prefixes=PTR64,ALL %s ; RUN: | FileCheck -check-prefixes=PTR64,ALL %s
; ALL-LABEL: foo:
; PTR32: lw $[[R0:[0-9]+]] ; PTR32: lw $[[R0:[0-9]+]]
; PTR32: addiu $[[R1:[0-9]+]], $zero, -4
; PTR32: and $[[R2:[0-9]+]], $[[R0]], $[[R1]]
; PTR64: ld $[[R0:[0-9]+]] ; PTR64: ld $[[R0:[0-9]+]]
; PTR64: daddiu $[[R1:[0-9]+]], $zero, -4
; PTR64: and $[[R2:[0-9]+]], $[[R0]], $[[R1]]
; ALL: ll ${{[0-9]+}}, 0($[[R2]]) ; ALL: ll ${{[0-9]+}}, 0($[[R0]])
define {i16, i1} @foo(i16** %addr, i16 signext %r, i16 zeroext %new) { define {i16, i1} @foo(i16* %addr, i16 signext %r, i16 zeroext %new) {
%ptr = load i16*, i16** %addr %res = cmpxchg i16* %addr, i16 %r, i16 %new seq_cst seq_cst
%res = cmpxchg i16* %ptr, i16 %r, i16 %new seq_cst seq_cst
ret {i16, i1} %res ret {i16, i1} %res
} }