forked from OSchip/llvm-project
Do better with physical reg operands (typically, from inline asm)
in local register allocator. If a reg-reg copy has a phys reg input and a virt reg output, and this is the last use of the phys reg, assign the phys reg to the virt reg. If a reg-reg copy has a phys reg output and we need to reload its spilled input, reload it directly into the phys reg than passing it through another reg. Following 76208, there is sometimes no dependency between the def of a phys reg and its use; this creates a window where that phys reg can be used for spilling (this is true in linear scan also). This is bad and needs to be fixed a better way, although 76208 works too well in practice to be reverted. However, there should normally be no spilling within inline asm blocks. The patch here goes a long way towards making this actually be true. llvm-svn: 91485
This commit is contained in:
parent
826ca5630e
commit
56f041406d
|
@ -233,14 +233,17 @@ namespace {
|
|||
/// in one of several ways: if the register is available in a physical
|
||||
/// register already, it uses that physical register. If the value is not
|
||||
/// in a physical register, and if there are physical registers available,
|
||||
/// it loads it into a register. If register pressure is high, and it is
|
||||
/// possible, it tries to fold the load of the virtual register into the
|
||||
/// instruction itself. It avoids doing this if register pressure is low to
|
||||
/// improve the chance that subsequent instructions can use the reloaded
|
||||
/// value. This method returns the modified instruction.
|
||||
/// it loads it into a register: PhysReg if that is an available physical
|
||||
/// register, otherwise any physical register of the right class.
|
||||
/// If register pressure is high, and it is possible, it tries to fold the
|
||||
/// load of the virtual register into the instruction itself. It avoids
|
||||
/// doing this if register pressure is low to improve the chance that
|
||||
/// subsequent instructions can use the reloaded value. This method
|
||||
/// returns the modified instruction.
|
||||
///
|
||||
MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
|
||||
unsigned OpNum, SmallSet<unsigned, 4> &RRegs);
|
||||
unsigned OpNum, SmallSet<unsigned, 4> &RRegs,
|
||||
unsigned PhysReg);
|
||||
|
||||
/// ComputeLocalLiveness - Computes liveness of registers within a basic
|
||||
/// block, setting the killed/dead flags as appropriate.
|
||||
|
@ -471,15 +474,17 @@ unsigned RALocal::getReg(MachineBasicBlock &MBB, MachineInstr *I,
|
|||
/// one of several ways: if the register is available in a physical register
|
||||
/// already, it uses that physical register. If the value is not in a physical
|
||||
/// register, and if there are physical registers available, it loads it into a
|
||||
/// register: PhysReg if that is an available physical register, otherwise any
|
||||
/// register. If register pressure is high, and it is possible, it tries to
|
||||
/// fold the load of the virtual register into the instruction itself. It
|
||||
/// avoids doing this if register pressure is low to improve the chance that
|
||||
/// subsequent instructions can use the reloaded value. This method returns the
|
||||
/// modified instruction.
|
||||
/// subsequent instructions can use the reloaded value. This method returns
|
||||
/// the modified instruction.
|
||||
///
|
||||
MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
|
||||
unsigned OpNum,
|
||||
SmallSet<unsigned, 4> &ReloadedRegs) {
|
||||
SmallSet<unsigned, 4> &ReloadedRegs,
|
||||
unsigned PhysReg) {
|
||||
unsigned VirtReg = MI->getOperand(OpNum).getReg();
|
||||
|
||||
// If the virtual register is already available, just update the instruction
|
||||
|
@ -494,7 +499,11 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
|
|||
// Otherwise, we need to fold it into the current instruction, or reload it.
|
||||
// If we have registers available to hold the value, use them.
|
||||
const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
|
||||
unsigned PhysReg = getFreeReg(RC);
|
||||
// If we already have a PhysReg (this happens when the instruction is a
|
||||
// reg-to-reg copy with a PhysReg destination) use that.
|
||||
if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) ||
|
||||
!isPhysRegAvailable(PhysReg))
|
||||
PhysReg = getFreeReg(RC);
|
||||
int FrameIndex = getStackSpaceFor(VirtReg, RC);
|
||||
|
||||
if (PhysReg) { // Register is available, allocate it!
|
||||
|
@ -752,6 +761,12 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
|
|||
errs() << '\n';
|
||||
});
|
||||
|
||||
// Determine whether this is a copy instruction. The cases where the
|
||||
// source or destination are phys regs are handled specially.
|
||||
unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg;
|
||||
bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
|
||||
SrcCopySubReg, DstCopySubReg);
|
||||
|
||||
// Loop over the implicit uses, making sure that they are at the head of the
|
||||
// use order list, so they don't get reallocated.
|
||||
if (TID.ImplicitUses) {
|
||||
|
@ -835,7 +850,8 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
|
|||
// here we are looking for only used operands (never def&use)
|
||||
if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
|
||||
TargetRegisterInfo::isVirtualRegister(MO.getReg()))
|
||||
MI = reloadVirtReg(MBB, MI, i, ReloadedRegs);
|
||||
MI = reloadVirtReg(MBB, MI, i, ReloadedRegs,
|
||||
isCopy ? DstCopyReg : 0);
|
||||
}
|
||||
|
||||
// If this instruction is the last user of this register, kill the
|
||||
|
@ -948,8 +964,17 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
|
|||
unsigned DestPhysReg;
|
||||
|
||||
// If DestVirtReg already has a value, use it.
|
||||
if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
|
||||
DestPhysReg = getReg(MBB, MI, DestVirtReg);
|
||||
if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) {
|
||||
// If this is a copy, the source reg is a phys reg, and
|
||||
// that reg is available, use that phys reg for DestPhysReg.
|
||||
if (isCopy &&
|
||||
TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) &&
|
||||
isPhysRegAvailable(SrcCopyReg)) {
|
||||
DestPhysReg = SrcCopyReg;
|
||||
assignVirtToPhysReg(DestVirtReg, DestPhysReg);
|
||||
} else
|
||||
DestPhysReg = getReg(MBB, MI, DestVirtReg);
|
||||
}
|
||||
MF->getRegInfo().setPhysRegUsed(DestPhysReg);
|
||||
markVirtRegModified(DestVirtReg);
|
||||
getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
|
||||
|
@ -995,9 +1020,9 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
|
|||
// Finally, if this is a noop copy instruction, zap it. (Except that if
|
||||
// the copy is dead, it must be kept to avoid messing up liveness info for
|
||||
// the register scavenger. See pr4100.)
|
||||
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
|
||||
if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
|
||||
SrcReg == DstReg && DeadDefs.empty())
|
||||
if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
|
||||
SrcCopySubReg, DstCopySubReg) &&
|
||||
SrcCopyReg == DstCopyReg && DeadDefs.empty())
|
||||
MBB.erase(MI);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc < %s | grep {subfc r3,r5,r4}
|
||||
; RUN: llc < %s | grep {subfze r4,r2}
|
||||
; RUN: llc < %s -regalloc=local | grep {subfc r5,r2,r4}
|
||||
; RUN: llc < %s -regalloc=local | grep {subfze r2,r3}
|
||||
; RUN: llc < %s -regalloc=local | grep {subfc r5,r4,r3}
|
||||
; RUN: llc < %s -regalloc=local | grep {subfze r2,r2}
|
||||
; The first argument of subfc must not be the same as any other register.
|
||||
|
||||
; PR1357
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=local | FileCheck %s
|
||||
|
||||
@.str = private constant [12 x i8] c"x + y = %i\0A\00", align 1 ; <[12 x i8]*> [#uses=1]
|
||||
|
||||
define i32 @main() nounwind {
|
||||
entry:
|
||||
; CHECK: movl 24(%esp), %eax
|
||||
; CHECK-NOT: movl
|
||||
; CHECK: movl %eax, 36(%esp)
|
||||
; CHECK-NOT: movl
|
||||
; CHECK: movl 28(%esp), %ebx
|
||||
; CHECK-NOT: movl
|
||||
; CHECK: movl %ebx, 40(%esp)
|
||||
; CHECK-NOT: movl
|
||||
; CHECK: addl %ebx, %eax
|
||||
%retval = alloca i32 ; <i32*> [#uses=2]
|
||||
%"%ebx" = alloca i32 ; <i32*> [#uses=1]
|
||||
%"%eax" = alloca i32 ; <i32*> [#uses=2]
|
||||
%result = alloca i32 ; <i32*> [#uses=2]
|
||||
%y = alloca i32 ; <i32*> [#uses=2]
|
||||
%x = alloca i32 ; <i32*> [#uses=2]
|
||||
%0 = alloca i32 ; <i32*> [#uses=2]
|
||||
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
|
||||
store i32 1, i32* %x, align 4
|
||||
store i32 2, i32* %y, align 4
|
||||
call void asm sideeffect alignstack "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
|
||||
%asmtmp = call i32 asm sideeffect alignstack "movl $1, $0", "=={eax},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32* %x) nounwind ; <i32> [#uses=1]
|
||||
store i32 %asmtmp, i32* %"%eax"
|
||||
%asmtmp1 = call i32 asm sideeffect alignstack "movl $1, $0", "=={ebx},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32* %y) nounwind ; <i32> [#uses=1]
|
||||
store i32 %asmtmp1, i32* %"%ebx"
|
||||
%1 = call i32 asm "", "={bx}"() nounwind ; <i32> [#uses=1]
|
||||
%2 = call i32 asm "", "={ax}"() nounwind ; <i32> [#uses=1]
|
||||
%asmtmp2 = call i32 asm sideeffect alignstack "addl $1, $0", "=={eax},{ebx},{eax},~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %1, i32 %2) nounwind ; <i32> [#uses=1]
|
||||
store i32 %asmtmp2, i32* %"%eax"
|
||||
%3 = call i32 asm "", "={ax}"() nounwind ; <i32> [#uses=1]
|
||||
call void asm sideeffect alignstack "movl $0, $1", "{eax},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %3, i32* %result) nounwind
|
||||
%4 = load i32* %result, align 4 ; <i32> [#uses=1]
|
||||
%5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 %4) nounwind ; <i32> [#uses=0]
|
||||
store i32 0, i32* %0, align 4
|
||||
%6 = load i32* %0, align 4 ; <i32> [#uses=1]
|
||||
store i32 %6, i32* %retval, align 4
|
||||
br label %return
|
||||
|
||||
return: ; preds = %entry
|
||||
%retval3 = load i32* %retval ; <i32> [#uses=1]
|
||||
ret i32 %retval3
|
||||
}
|
||||
|
||||
declare i32 @printf(i8*, ...) nounwind
|
Loading…
Reference in New Issue