Revert "[mips] Fix atomic compare and swap at O0."

This reverts r296132. I forgot to include the tests. llvm-svn: 296133
2017-02-24 16:30:27 +00:00 · 2017-02-24 16:30:27 +00:00 · 3c58c18ff0
parent cf0e06d375
commit 3c58c18ff0
7 changed files with 154 additions and 70 deletions
--- a/llvm/lib/Target/Mips/CMakeLists.txt
+++ b/llvm/lib/Target/Mips/CMakeLists.txt
@ -26,7 +26,6 @@ add_llvm_target(MipsCodeGen
  MipsCCState.cpp
  MipsConstantIslandPass.cpp
  MipsDelaySlotFiller.cpp
  MipsExpandPseudo.cpp
  MipsFastISel.cpp
  MipsHazardSchedule.cpp
  MipsInstrInfo.cpp
--- a/llvm/lib/Target/Mips/Mips.h
+++ b/llvm/lib/Target/Mips/Mips.h
@ -32,7 +32,6 @@ namespace llvm {
  FunctionPass *createMipsHazardSchedule();
  FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
  FunctionPass *createMipsConstantIslandPass();
  FunctionPass *createMipsExpandPseudoPass();
 } // end namespace llvm;
 #endif
--- a/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips64InstrInfo.td
@ -73,9 +73,6 @@ let usesCustomInserter = 1 in {
  def ATOMIC_LOAD_XOR_I64  : Atomic2Ops<atomic_load_xor_64, GPR64>;
  def ATOMIC_LOAD_NAND_I64 : Atomic2Ops<atomic_load_nand_64, GPR64>;
  def ATOMIC_SWAP_I64      : Atomic2Ops<atomic_swap_64, GPR64>;
 }
 let isPseudo = 1 in {
  def ATOMIC_CMP_SWAP_I64  : AtomicCmpSwap<atomic_cmp_swap_64, GPR64>;
 }
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@ -1053,11 +1053,14 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
  case Mips::ATOMIC_SWAP_I64:
    return emitAtomicBinary(MI, BB, 8, 0);
-  case Mips::ATOMIC_CMP_SWAP_I8_PSEUDO:
+  case Mips::ATOMIC_CMP_SWAP_I8:
    return emitAtomicCmpSwapPartword(MI, BB, 1);
-  case Mips::ATOMIC_CMP_SWAP_I16_PSEUDO:
+  case Mips::ATOMIC_CMP_SWAP_I16:
    return emitAtomicCmpSwapPartword(MI, BB, 2);
-
+  case Mips::ATOMIC_CMP_SWAP_I32:
    return emitAtomicCmpSwap(MI, BB, 4);
  case Mips::ATOMIC_CMP_SWAP_I64:
    return emitAtomicCmpSwap(MI, BB, 8);
  case Mips::PseudoSDIV:
  case Mips::PseudoUDIV:
  case Mips::DIV:
@ -1404,6 +1407,96 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
  return exitMBB;
 }
 MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI,
                                                         MachineBasicBlock *BB,
                                                         unsigned Size) const {
  assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap.");
  MachineFunction *MF = BB->getParent();
  MachineRegisterInfo &RegInfo = MF->getRegInfo();
  const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
  const bool ArePtrs64bit = ABI.ArePtrs64bit();
  DebugLoc DL = MI.getDebugLoc();
  unsigned LL, SC, ZERO, BNE, BEQ;
  if (Size == 4) {
    if (isMicroMips) {
      LL = Mips::LL_MM;
      SC = Mips::SC_MM;
    } else {
      LL = Subtarget.hasMips32r6()
               ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6)
               : (ArePtrs64bit ? Mips::LL64 : Mips::LL);
      SC = Subtarget.hasMips32r6()
               ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6)
               : (ArePtrs64bit ? Mips::SC64 : Mips::SC);
    }
    ZERO = Mips::ZERO;
    BNE = Mips::BNE;
    BEQ = Mips::BEQ;
  } else {
    LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD;
    SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD;
    ZERO = Mips::ZERO_64;
    BNE = Mips::BNE64;
    BEQ = Mips::BEQ64;
  }
  unsigned Dest = MI.getOperand(0).getReg();
  unsigned Ptr = MI.getOperand(1).getReg();
  unsigned OldVal = MI.getOperand(2).getReg();
  unsigned NewVal = MI.getOperand(3).getReg();
  unsigned Success = RegInfo.createVirtualRegister(RC);
  // insert new blocks after the current block
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineFunction::iterator It = ++BB->getIterator();
  MF->insert(It, loop1MBB);
  MF->insert(It, loop2MBB);
  MF->insert(It, exitMBB);
  // Transfer the remainder of BB and its successor edges to exitMBB.
  exitMBB->splice(exitMBB->begin(), BB,
                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
  //  thisMBB:
  //    ...
  //    fallthrough --> loop1MBB
  BB->addSuccessor(loop1MBB);
  loop1MBB->addSuccessor(exitMBB);
  loop1MBB->addSuccessor(loop2MBB);
  loop2MBB->addSuccessor(loop1MBB);
  loop2MBB->addSuccessor(exitMBB);
  // loop1MBB:
  //   ll dest, 0(ptr)
  //   bne dest, oldval, exitMBB
  BB = loop1MBB;
  BuildMI(BB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0);
  BuildMI(BB, DL, TII->get(BNE))
    .addReg(Dest).addReg(OldVal).addMBB(exitMBB);
  // loop2MBB:
  //   sc success, newval, 0(ptr)
  //   beq success, $0, loop1MBB
  BB = loop2MBB;
  BuildMI(BB, DL, TII->get(SC), Success)
    .addReg(NewVal).addReg(Ptr).addImm(0);
  BuildMI(BB, DL, TII->get(BEQ))
    .addReg(Success).addReg(ZERO).addMBB(loop1MBB);
  MI.eraseFromParent(); // The instruction is gone now.
  return exitMBB;
 }
 MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
    MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const {
  assert((Size == 1 || Size == 2) &&
@ -1428,15 +1521,18 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
  unsigned Mask = RegInfo.createVirtualRegister(RC);
  unsigned Mask2 = RegInfo.createVirtualRegister(RC);
  unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC);
  unsigned OldVal = RegInfo.createVirtualRegister(RC);
  unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC);
  unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC);
  unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp);
  unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC);
  unsigned MaskUpper = RegInfo.createVirtualRegister(RC);
  unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC);
  unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC);
-  unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I8_PSEUDO
+  unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
-                          ? Mips::ATOMIC_CMP_SWAP_I8_FRAG
+  unsigned StoreVal = RegInfo.createVirtualRegister(RC);
-                          : Mips::ATOMIC_CMP_SWAP_I16_FRAG;
+  unsigned SrlRes = RegInfo.createVirtualRegister(RC);
  unsigned Success = RegInfo.createVirtualRegister(RC);
  unsigned LL, SC;
  if (isMicroMips) {
@ -1451,8 +1547,14 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
  // insert new blocks after the current block
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  MachineFunction::iterator It = ++BB->getIterator();
  MF->insert(It, loop1MBB);
  MF->insert(It, loop2MBB);
  MF->insert(It, sinkMBB);
  MF->insert(It, exitMBB);
  // Transfer the remainder of BB and its successor edges to exitMBB.
@ -1460,7 +1562,12 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-  BB->addSuccessor(exitMBB);
+  BB->addSuccessor(loop1MBB);
  loop1MBB->addSuccessor(sinkMBB);
  loop1MBB->addSuccessor(loop2MBB);
  loop2MBB->addSuccessor(loop1MBB);
  loop2MBB->addSuccessor(sinkMBB);
  sinkMBB->addSuccessor(exitMBB);
  // FIXME: computation of newval2 can be moved to loop2MBB.
  //  thisMBB:
@ -1505,31 +1612,40 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
  BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal)
    .addReg(MaskedNewVal).addReg(ShiftAmt);
-  // For correctness purpose, a new pseudo is introduced here. We need this
+  //  loop1MBB:
-  // new pseudo, so that FastRegisterAllocator does not see an ll/sc sequence
+  //    ll      oldval,0(alginedaddr)
-  // that is spread over >1 basic blocks. A register allocator which
+  //    and     maskedoldval0,oldval,mask
-  // introduces (or any codegen infact) a store, can violate the expactations
+  //    bne     maskedoldval0,shiftedcmpval,sinkMBB
-  // of the hardware.
+  BB = loop1MBB;
-  //
+  BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
-  // An atomic read-modify-write sequence starts with a linked load
+  BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0)
-  // instruction and ends with a store conditional instruction. The atomic
+    .addReg(OldVal).addReg(Mask);
-  // read-modify-write sequence failes if any of the following conditions
+  BuildMI(BB, DL, TII->get(Mips::BNE))
-  // occur between the execution of ll and sc:
+    .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB);
-  //   * A coherent store is completed by another process or coherent I/O
+
-  //     module into the block of synchronizable physical memory containing
+  //  loop2MBB:
-  //     the word. The size and alignment of the block is
+  //    and     maskedoldval1,oldval,mask2
-  //     implementation-dependent.
+  //    or      storeval,maskedoldval1,shiftednewval
-  //   * A coherent store is executed between an LL and SC sequence on the
+  //    sc      success,storeval,0(alignedaddr)
-  //     same processor to the block of synchornizable physical memory
+  //    beq     success,$0,loop1MBB
-  //     containing the word.
+  BB = loop2MBB;
-  //
+  BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1)
-  BuildMI(BB, DL, TII->get(AtomicOp), Dest)
+    .addReg(OldVal).addReg(Mask2);
-      .addReg(AlignedAddr)
+  BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
-      .addReg(Mask)
+    .addReg(MaskedOldVal1).addReg(ShiftedNewVal);
-      .addReg(ShiftedCmpVal)
+  BuildMI(BB, DL, TII->get(SC), Success)
-      .addReg(Mask2)
+      .addReg(StoreVal).addReg(AlignedAddr).addImm(0);
-      .addReg(ShiftedNewVal)
+  BuildMI(BB, DL, TII->get(Mips::BEQ))
-      .addReg(ShiftAmt);
+      .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
  //  sinkMBB:
  //    srl     srlres,maskedoldval0,shiftamt
  //    sign_extend dest,srlres
  BB = sinkMBB;
  BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
      .addReg(MaskedOldVal0).addReg(ShiftAmt);
  BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes);
  MI.eraseFromParent(); // The instruction is gone now.
--- a/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.td
@ -1666,10 +1666,6 @@ class AtomicCmpSwap<PatFrag Op, RegisterClass DRC> :
  PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap),
           [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>;
 class AtomicCmpSwapSubword<RegisterClass RC> :
  PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal,
                                RC:$mask2, RC:$ShiftNewVal, RC:$ShiftAmt), []>;
 class LLBase<string opstr, RegisterOperand RO, DAGOperand MO = mem> :
  InstSE<(outs RO:$rt), (ins MO:$addr), !strconcat(opstr, "\t$rt, $addr"),
         [], II_LL, FrmI, opstr> {
@ -1748,21 +1744,11 @@ let usesCustomInserter = 1 in {
  def ATOMIC_SWAP_I16      : Atomic2Ops<atomic_swap_16, GPR32>;
  def ATOMIC_SWAP_I32      : Atomic2Ops<atomic_swap_32, GPR32>;
-  def ATOMIC_CMP_SWAP_I8_PSEUDO : AtomicCmpSwap<atomic_cmp_swap_8, GPR32>;
+  def ATOMIC_CMP_SWAP_I8   : AtomicCmpSwap<atomic_cmp_swap_8, GPR32>;
-  def ATOMIC_CMP_SWAP_I16_PSEUDO : AtomicCmpSwap<atomic_cmp_swap_16, GPR32>;
+  def ATOMIC_CMP_SWAP_I16  : AtomicCmpSwap<atomic_cmp_swap_16, GPR32>;
 }
 let isPseudo = 1 in {
  // The expansion of ATOMIC_CMP_SWAP_I(8|16) occurs in two parts. First,
  // the *_PSEUDO is partially lowering during ISelLowering to compute the
  // aligned addresses and necessary masks, along with another pseudo which
  // represents the ll/sc loop. That pseudo is lowered after the basic
  // postRA pseudos have been lowered.
  def ATOMIC_CMP_SWAP_I8_FRAG : AtomicCmpSwapSubword<GPR32>;
  def ATOMIC_CMP_SWAP_I16_FRAG : AtomicCmpSwapSubword<GPR32>;
  def ATOMIC_CMP_SWAP_I32  : AtomicCmpSwap<atomic_cmp_swap_32, GPR32>;
 }
 /// Pseudo instructions for loading and storing accumulator registers.
 let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
  def LOAD_ACC64  : Load<"", ACC64>;
--- a/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@ -213,7 +213,6 @@ public:
  bool addInstSelector() override;
  void addPreEmitPass() override;
  void addPreRegAlloc() override;
  void addPreSched2() override;
 };
 } // end anonymous namespace
@ -271,7 +270,3 @@ void MipsPassConfig::addPreEmitPass() {
  addPass(createMipsLongBranchPass(TM));
  addPass(createMipsConstantIslandPass());
 }
 void MipsPassConfig::addPreSched2() {
  addPass(createMipsExpandPseudoPass());
 }
--- a/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll
+++ b/llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll
@ -5,21 +5,13 @@
 ; RUN: llc -O0 -march=mips64el -mcpu=mips64r2 -target-abi=n64 < %s -filetype=asm -o - \
 ; RUN:   | FileCheck -check-prefixes=PTR64,ALL %s
 ; ALL-LABEL: foo:
 ; PTR32: lw $[[R0:[0-9]+]]
 ; PTR32: addiu $[[R1:[0-9]+]], $zero, -4
 ; PTR32: and $[[R2:[0-9]+]], $[[R0]], $[[R1]]
 ; PTR64: ld $[[R0:[0-9]+]]
 ; PTR64: daddiu $[[R1:[0-9]+]], $zero, -4
 ; PTR64: and $[[R2:[0-9]+]], $[[R0]], $[[R1]]
-; ALL: ll ${{[0-9]+}}, 0($[[R2]])
+; ALL: ll ${{[0-9]+}}, 0($[[R0]])
-define {i16, i1} @foo(i16** %addr, i16 signext %r, i16 zeroext %new) {
+define {i16, i1} @foo(i16* %addr, i16 signext %r, i16 zeroext %new) {
-  %ptr = load i16*, i16** %addr
+  %res = cmpxchg i16* %addr, i16 %r, i16 %new seq_cst seq_cst
  %res = cmpxchg i16* %ptr, i16 %r, i16 %new seq_cst seq_cst
  ret {i16, i1} %res
 }