Set mayLoad or mayStore flags for SC and LL in order to prevent LICM from

moving them out of the loop. Previously, stores and loads to a stack frame
object were inserted to accomplish this. Remove the code that was needed to do
this. Patch by Sasa Stankovic.

llvm-svn: 135415
This commit is contained in:
Akira Hatanaka 2011-07-18 18:52:12 +00:00
parent 64d53620aa
commit 27292638bd
4 changed files with 41 additions and 109 deletions

View File

@ -733,11 +733,10 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc(); DebugLoc dl = MI->getDebugLoc();
unsigned Dest = MI->getOperand(0).getReg(); unsigned Oldval = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg(); unsigned Ptr = MI->getOperand(1).getReg();
unsigned Incr = MI->getOperand(2).getReg(); unsigned Incr = MI->getOperand(2).getReg();
unsigned Oldval = RegInfo.createVirtualRegister(RC);
unsigned Tmp1 = RegInfo.createVirtualRegister(RC); unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
unsigned Tmp2 = RegInfo.createVirtualRegister(RC); unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
unsigned Tmp3 = RegInfo.createVirtualRegister(RC); unsigned Tmp3 = RegInfo.createVirtualRegister(RC);
@ -759,38 +758,16 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// thisMBB: // thisMBB:
// ... // ...
// sw incr, fi(sp) // store incr to stack (when BinOpcode == 0)
// fallthrough --> loopMBB // fallthrough --> loopMBB
// Note: for atomic.swap (when BinOpcode == 0), storing incr to stack before
// the loop and then loading it from stack in block loopMBB is necessary to
// prevent MachineLICM pass to hoist "or" instruction out of the block
// loopMBB.
int fi = 0;
if (BinOpcode == 0 && !Nand) {
// Get or create a temporary stack location.
MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
fi = MipsFI->getAtomicFrameIndex();
if (fi == -1) {
fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
MipsFI->setAtomicFrameIndex(fi);
}
BuildMI(BB, dl, TII->get(Mips::SW))
.addReg(Incr).addFrameIndex(fi).addImm(0);
}
BB->addSuccessor(loopMBB); BB->addSuccessor(loopMBB);
// loopMBB: // loopMBB:
// ll oldval, 0(ptr) // ll oldval, 0(ptr)
// or dest, $0, oldval
// <binop> tmp1, oldval, incr // <binop> tmp1, oldval, incr
// sc tmp1, 0(ptr) // sc tmp1, 0(ptr)
// beq tmp1, $0, loopMBB // beq tmp1, $0, loopMBB
BB = loopMBB; BB = loopMBB;
BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Ptr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::OR), Dest).addReg(Mips::ZERO).addReg(Oldval);
if (Nand) { if (Nand) {
// and tmp2, oldval, incr // and tmp2, oldval, incr
// nor tmp1, $0, tmp2 // nor tmp1, $0, tmp2
@ -800,10 +777,7 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// <binop> tmp1, oldval, incr // <binop> tmp1, oldval, incr
BuildMI(BB, dl, TII->get(BinOpcode), Tmp1).addReg(Oldval).addReg(Incr); BuildMI(BB, dl, TII->get(BinOpcode), Tmp1).addReg(Oldval).addReg(Incr);
} else { } else {
// lw tmp2, fi(sp) // load incr from stack Tmp1 = Incr;
// or tmp1, $zero, tmp2
BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0);
BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
} }
BuildMI(BB, dl, TII->get(Mips::SC), Tmp3).addReg(Tmp1).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::SC), Tmp3).addReg(Tmp1).addReg(Ptr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::BEQ)) BuildMI(BB, dl, TII->get(Mips::BEQ))
@ -880,12 +854,6 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
// nor mask2,$0,mask // nor mask2,$0,mask
// andi tmp4,incr,255 // andi tmp4,incr,255
// sll incr2,tmp4,shift // sll incr2,tmp4,shift
// sw incr2, fi(sp) // store incr2 to stack (when BinOpcode == 0)
// Note: for atomic.swap (when BinOpcode == 0), storing incr2 to stack before
// the loop and then loading it from stack in block loopMBB is necessary to
// prevent MachineLICM pass to hoist "or" instruction out of the block
// loopMBB.
int64_t MaskImm = (Size == 1) ? 255 : 65535; int64_t MaskImm = (Size == 1) ? 255 : 65535;
BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4); BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4);
@ -904,21 +872,9 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp5).addReg(Shift); BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp5).addReg(Shift);
} }
int fi = 0;
if (BinOpcode == 0 && !Nand) {
// Get or create a temporary stack location.
MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
fi = MipsFI->getAtomicFrameIndex();
if (fi == -1) {
fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
MipsFI->setAtomicFrameIndex(fi);
}
BuildMI(BB, dl, TII->get(Mips::SW))
.addReg(Incr2).addFrameIndex(fi).addImm(0);
}
BB->addSuccessor(loopMBB); BB->addSuccessor(loopMBB);
// atomic.load.binop
// loopMBB: // loopMBB:
// ll oldval,0(addr) // ll oldval,0(addr)
// binop tmp7,oldval,incr2 // binop tmp7,oldval,incr2
@ -927,6 +883,15 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
// or tmp9,tmp8,newval // or tmp9,tmp8,newval
// sc tmp9,0(addr) // sc tmp9,0(addr)
// beq tmp9,$0,loopMBB // beq tmp9,$0,loopMBB
// atomic.swap
// loopMBB:
// ll oldval,0(addr)
// and tmp8,oldval,mask2
// or tmp9,tmp8,incr2
// sc tmp9,0(addr)
// beq tmp9,$0,loopMBB
BB = loopMBB; BB = loopMBB;
BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Addr).addImm(0); BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Addr).addImm(0);
if (Nand) { if (Nand) {
@ -940,15 +905,14 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
} else if (BinOpcode) { } else if (BinOpcode) {
// <binop> tmp7, oldval, incr2 // <binop> tmp7, oldval, incr2
BuildMI(BB, dl, TII->get(BinOpcode), Tmp7).addReg(Oldval).addReg(Incr2); BuildMI(BB, dl, TII->get(BinOpcode), Tmp7).addReg(Oldval).addReg(Incr2);
} else {
// lw tmp6, fi(sp) // load incr2 from stack
// or tmp7, $zero, tmp6
BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addFrameIndex(fi).addImm(0);
BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6);
} }
BuildMI(BB, dl, TII->get(Mips::AND), Newval).addReg(Tmp7).addReg(Mask); if (BinOpcode != 0 || Nand)
BuildMI(BB, dl, TII->get(Mips::AND), Newval).addReg(Tmp7).addReg(Mask);
BuildMI(BB, dl, TII->get(Mips::AND), Tmp8).addReg(Oldval).addReg(Mask2); BuildMI(BB, dl, TII->get(Mips::AND), Tmp8).addReg(Oldval).addReg(Mask2);
BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Newval); if (BinOpcode != 0 || Nand)
BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Newval);
else
BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Incr2);
BuildMI(BB, dl, TII->get(Mips::SC), Tmp13) BuildMI(BB, dl, TII->get(Mips::SC), Tmp13)
.addReg(Tmp9).addReg(Addr).addImm(0); .addReg(Tmp9).addReg(Addr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::BEQ)) BuildMI(BB, dl, TII->get(Mips::BEQ))
@ -996,7 +960,6 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
unsigned Newval = MI->getOperand(3).getReg(); unsigned Newval = MI->getOperand(3).getReg();
unsigned Tmp1 = RegInfo.createVirtualRegister(RC); unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
unsigned Tmp3 = RegInfo.createVirtualRegister(RC); unsigned Tmp3 = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block // insert new blocks after the current block
@ -1016,25 +979,9 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
BB->end()); BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB); exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// Get or create a temporary stack location.
MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
int fi = MipsFI->getAtomicFrameIndex();
if (fi == -1) {
fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
MipsFI->setAtomicFrameIndex(fi);
}
// thisMBB: // thisMBB:
// ... // ...
// sw newval, fi(sp) // store newval to stack
// fallthrough --> loop1MBB // fallthrough --> loop1MBB
// Note: storing newval to stack before the loop and then loading it from
// stack in block loop2MBB is necessary to prevent MachineLICM pass to
// hoist "or" instruction out of the block loop2MBB.
BuildMI(BB, dl, TII->get(Mips::SW))
.addReg(Newval).addFrameIndex(fi).addImm(0);
BB->addSuccessor(loop1MBB); BB->addSuccessor(loop1MBB);
// loop1MBB: // loop1MBB:
@ -1048,13 +995,11 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
BB->addSuccessor(loop2MBB); BB->addSuccessor(loop2MBB);
// loop2MBB: // loop2MBB:
// lw tmp2, fi(sp) // load newval from stack // or tmp1, $0, newval
// or tmp1, $0, tmp2
// sc tmp1, 0(ptr) // sc tmp1, 0(ptr)
// beq tmp1, $0, loop1MBB // beq tmp1, $0, loop1MBB
BB = loop2MBB; BB = loop2MBB;
BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0); BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Newval);
BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
BuildMI(BB, dl, TII->get(Mips::SC), Tmp3).addReg(Tmp1).addReg(Ptr).addImm(0); BuildMI(BB, dl, TII->get(Mips::SC), Tmp3).addReg(Tmp1).addReg(Ptr).addImm(0);
BuildMI(BB, dl, TII->get(Mips::BEQ)) BuildMI(BB, dl, TII->get(Mips::BEQ))
.addReg(Tmp3).addReg(Mips::ZERO).addMBB(loop1MBB); .addReg(Tmp3).addReg(Mips::ZERO).addMBB(loop1MBB);

View File

@ -590,10 +590,10 @@ def SH : StoreM<0x29, "sh", truncstorei16>;
def SW : StoreM<0x2b, "sw", store>; def SW : StoreM<0x2b, "sw", store>;
/// Load-linked, Store-conditional /// Load-linked, Store-conditional
let hasDelaySlot = 1 in let mayLoad = 1, hasDelaySlot = 1 in
def LL : FI<0x30, (outs CPURegs:$dst), (ins mem:$addr), def LL : FI<0x30, (outs CPURegs:$dst), (ins mem:$addr),
"ll\t$dst, $addr", [], IILoad>; "ll\t$dst, $addr", [], IILoad>;
let Constraints = "$src = $dst" in let mayStore = 1, Constraints = "$src = $dst" in
def SC : FI<0x38, (outs CPURegs:$dst), (ins CPURegs:$src, mem:$addr), def SC : FI<0x38, (outs CPURegs:$dst), (ins CPURegs:$src, mem:$addr),
"sc\t$src, $addr", [], IIStore>; "sc\t$src, $addr", [], IIStore>;

View File

@ -51,16 +51,12 @@ private:
mutable int DynAllocFI; // Frame index of dynamically allocated stack area. mutable int DynAllocFI; // Frame index of dynamically allocated stack area.
unsigned MaxCallFrameSize; unsigned MaxCallFrameSize;
/// AtomicFrameIndex - To implement atomic.swap and atomic.cmp.swap
/// intrinsics, it is necessary to use a temporary stack location.
/// This field holds the frame index of this location.
int AtomicFrameIndex;
public: public:
MipsFunctionInfo(MachineFunction& MF) MipsFunctionInfo(MachineFunction& MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0), : MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0), OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0),
MaxCallFrameSize(0), AtomicFrameIndex(-1) MaxCallFrameSize(0)
{} {}
bool isInArgFI(int FI) const { bool isInArgFI(int FI) const {
@ -104,9 +100,6 @@ public:
unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; } unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; }
void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; } void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; }
int getAtomicFrameIndex() const { return AtomicFrameIndex; }
void setAtomicFrameIndex(int Index) { AtomicFrameIndex = Index; }
}; };
} // end of namespace llvm } // end of namespace llvm

View File

@ -24,7 +24,6 @@ entry:
; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp) ; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp)
; CHECK: $[[BB0:[A-Z_0-9]+]]: ; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]]) ; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]])
; CHECK: or $2, $zero, $[[R1]]
; CHECK: addu $[[R2:[0-9]+]], $[[R1]], $4 ; CHECK: addu $[[R2:[0-9]+]], $[[R1]], $4
; CHECK: sc $[[R2]], 0($[[R0]]) ; CHECK: sc $[[R2]], 0($[[R0]])
; CHECK: beq $[[R2]], $zero, $[[BB0]] ; CHECK: beq $[[R2]], $zero, $[[BB0]]
@ -39,43 +38,42 @@ entry:
; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp) ; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp)
; CHECK: $[[BB0:[A-Z_0-9]+]]: ; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]]) ; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]])
; CHECK: or $2, $zero, $[[R1]] ; CHECK: and $[[R3:[0-9]+]], $[[R1]], $4
; CHECK: and $[[R1]], $[[R1]], $4 ; CHECK: nor $[[R2:[0-9]+]], $zero, $[[R3]]
; CHECK: nor $[[R2:[0-9]+]], $zero, $[[R1]]
; CHECK: sc $[[R2]], 0($[[R0]]) ; CHECK: sc $[[R2]], 0($[[R0]])
; CHECK: beq $[[R2]], $zero, $[[BB0]] ; CHECK: beq $[[R2]], $zero, $[[BB0]]
} }
define i32 @AtomicSwap32(i32 %oldval) nounwind { define i32 @AtomicSwap32(i32 %newval) nounwind {
entry: entry:
%0 = call i32 @llvm.atomic.swap.i32.p0i32(i32* @x, i32 %oldval) %newval.addr = alloca i32, align 4
store i32 %newval, i32* %newval.addr, align 4
%tmp = load i32* %newval.addr, align 4
%0 = call i32 @llvm.atomic.swap.i32.p0i32(i32* @x, i32 %tmp)
ret i32 %0 ret i32 %0
; CHECK: AtomicSwap32: ; CHECK: AtomicSwap32:
; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp) ; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp)
; CHECK: sw $4, [[OFFSET:[0-9]+]]($sp)
; CHECK: $[[BB0:[A-Z_0-9]+]]: ; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]]) ; CHECK: ll ${{[0-9]+}}, 0($[[R0]])
; CHECK: or $2, $zero, $[[R1]] ; CHECK: sc $[[R2:[0-9]+]], 0($[[R0]])
; CHECK: lw $[[R2:[0-9]+]], [[OFFSET]]($sp) ; CHECK: beq $[[R2]], $zero, $[[BB0]]
; CHECK: or $[[R3:[0-9]+]], $zero, $[[R2]]
; CHECK: sc $[[R3]], 0($[[R0]])
; CHECK: beq $[[R3]], $zero, $[[BB0]]
} }
define i32 @AtomicCmpSwap32(i32 %oldval, i32 %newval) nounwind { define i32 @AtomicCmpSwap32(i32 %oldval, i32 %newval) nounwind {
entry: entry:
%0 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* @x, i32 %oldval, i32 %newval) %newval.addr = alloca i32, align 4
store i32 %newval, i32* %newval.addr, align 4
%tmp = load i32* %newval.addr, align 4
%0 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* @x, i32 %oldval, i32 %tmp)
ret i32 %0 ret i32 %0
; CHECK: AtomicCmpSwap32: ; CHECK: AtomicCmpSwap32:
; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp) ; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp)
; CHECK: sw $5, [[OFFSET:[0-9]+]]($sp)
; CHECK: $[[BB0:[A-Z_0-9]+]]: ; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $2, 0($[[R0]]) ; CHECK: ll $2, 0($[[R0]])
; CHECK: bne $2, $4, $[[BB1:[A-Z_0-9]+]] ; CHECK: bne $2, $4, $[[BB1:[A-Z_0-9]+]]
; CHECK: lw $[[R1:[0-9]+]], [[OFFSET]]($sp) ; CHECK: or $[[R2:[0-9]+]], $zero, $5
; CHECK: or $[[R2:[0-9]+]], $zero, $[[R1]]
; CHECK: sc $[[R2]], 0($[[R0]]) ; CHECK: sc $[[R2]], 0($[[R0]])
; CHECK: beq $[[R2]], $zero, $[[BB0]] ; CHECK: beq $[[R2]], $zero, $[[BB0]]
; CHECK: $[[BB1]]: ; CHECK: $[[BB1]]:
@ -183,9 +181,9 @@ entry:
; CHECK: sra $2, $[[R17]], 24 ; CHECK: sra $2, $[[R17]], 24
} }
define signext i8 @AtomicSwap8(i8 signext %oldval) nounwind { define signext i8 @AtomicSwap8(i8 signext %newval) nounwind {
entry: entry:
%0 = call i8 @llvm.atomic.swap.i8.p0i8(i8* @y, i8 %oldval) %0 = call i8 @llvm.atomic.swap.i8.p0i8(i8* @y, i8 %newval)
ret i8 %0 ret i8 %0
; CHECK: AtomicSwap8: ; CHECK: AtomicSwap8:
@ -199,15 +197,11 @@ entry:
; CHECK: nor $[[R7:[0-9]+]], $zero, $[[R6]] ; CHECK: nor $[[R7:[0-9]+]], $zero, $[[R6]]
; CHECK: andi $[[R8:[0-9]+]], $4, 255 ; CHECK: andi $[[R8:[0-9]+]], $4, 255
; CHECK: sll $[[R9:[0-9]+]], $[[R8]], $[[R4]] ; CHECK: sll $[[R9:[0-9]+]], $[[R8]], $[[R4]]
; CHECK: sw $[[R9]], [[OFFSET:[0-9]+]]($sp)
; CHECK: $[[BB0:[A-Z_0-9]+]]: ; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $[[R10:[0-9]+]], 0($[[R2]]) ; CHECK: ll $[[R10:[0-9]+]], 0($[[R2]])
; CHECK: lw $[[R18:[0-9]+]], [[OFFSET]]($sp)
; CHECK: or $[[R11:[0-9]+]], $zero, $[[R18]]
; CHECK: and $[[R12:[0-9]+]], $[[R11]], $[[R6]]
; CHECK: and $[[R13:[0-9]+]], $[[R10]], $[[R7]] ; CHECK: and $[[R13:[0-9]+]], $[[R10]], $[[R7]]
; CHECK: or $[[R14:[0-9]+]], $[[R13]], $[[R12]] ; CHECK: or $[[R14:[0-9]+]], $[[R13]], $[[R9]]
; CHECK: sc $[[R14]], 0($[[R2]]) ; CHECK: sc $[[R14]], 0($[[R2]])
; CHECK: beq $[[R14]], $zero, $[[BB0]] ; CHECK: beq $[[R14]], $zero, $[[BB0]]