[x86] Fixing PR28755 by precomputing the address used in CMPXCHG8B

The bug arises during register allocation on i686 for
CMPXCHG8B instruction when base pointer is needed. CMPXCHG8B
needs 4 implicit registers (EAX, EBX, ECX, EDX) and a memory address,
plus ESI is reserved as the base pointer. With such constraints the only
way register allocator would do its job successfully is when the addressing
mode of the instruction requires only one register. If that is not the case
- we are emitting additional LEA instruction to compute the address.

It fixes PR28755.

Patch by Alexander Ivchenko <alexander.ivchenko@intel.com>

Differential Revision: https://reviews.llvm.org/D25088

llvm-svn: 287875
This commit is contained in:
Nikolai Bozhenov 2016-11-24 13:23:35 +00:00
parent bb64aa14a3
commit 3a8d108b2b
4 changed files with 98 additions and 1 deletions

View File

@ -25153,6 +25153,57 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
case X86::LCMPXCHG8B: {
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
// In addition to 4 E[ABCD] registers implied by encoding, CMPXCHG8B
// requires a memory operand. If it happens that current architecture is
// i686 and for current function we need a base pointer
// - which is ESI for i686 - register allocator would not be able to
// allocate registers for an address in form of X(%reg, %reg, Y)
// - there never would be enough unreserved registers during regalloc
// (without the need for base ptr the only option would be X(%edi, %esi, Y).
// We are giving a hand to register allocator by precomputing the address in
// a new vreg using LEA.
// If it is not i686 or there is no base pointer - nothing to do here.
if (!Subtarget.is32Bit() || !TRI->hasBasePointer(*MF))
return BB;
// Even though this code does not necessarily needs the base pointer to
// be ESI, we check for that. The reason: if this assert fails, there are
// some changes happened in the compiler base pointer handling, which most
// probably have to be addressed somehow here.
assert(TRI->getBaseRegister() == X86::ESI &&
"LCMPXCHG8B custom insertion for i686 is written with X86::ESI as a "
"base pointer in mind");
MachineRegisterInfo &MRI = MF->getRegInfo();
MVT SPTy = getPointerTy(MF->getDataLayout());
const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
unsigned computedAddrVReg = MRI.createVirtualRegister(AddrRegClass);
X86AddressMode AM = getAddressFromInstr(&MI, 0);
// Regalloc does not need any help when the memory operand of CMPXCHG8B
// does not use index register.
if (AM.IndexReg == X86::NoRegister)
return BB;
// After X86TargetLowering::ReplaceNodeResults CMPXCHG8B is glued to its
// four operand definitions that are E[ABCD] registers. We skip them and
// then insert the LEA.
MachineBasicBlock::iterator MBBI(MI);
while (MBBI->definesRegister(X86::EAX) || MBBI->definesRegister(X86::EBX) ||
MBBI->definesRegister(X86::ECX) || MBBI->definesRegister(X86::EDX))
--MBBI;
addFullAddress(
BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM);
setDirectAddressInInstr(&MI, 0, computedAddrVReg);
return BB;
}
case X86::LCMPXCHG16B:
return BB;
case X86::LCMPXCHG8B_SAVE_EBX:
case X86::LCMPXCHG16B_SAVE_RBX: {
unsigned BasePtr =

View File

@ -128,6 +128,17 @@ addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0).addReg(0);
}
/// Replace the address used in the instruction with the direct memory
/// reference.
static inline void setDirectAddressInInstr(MachineInstr *MI, unsigned Operand,
unsigned Reg) {
// Direct memory address is in a form of: Reg, 1 (Scale), NoReg, 0, NoReg.
MI->getOperand(Operand).setReg(Reg);
MI->getOperand(Operand + 1).setImm(1);
MI->getOperand(Operand + 2).setReg(0);
MI->getOperand(Operand + 3).setImm(0);
MI->getOperand(Operand + 4).setReg(0);
}
static inline const MachineInstrBuilder &
addOffset(const MachineInstrBuilder &MIB, int Offset) {

View File

@ -723,7 +723,7 @@ defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, -1, "dec">;
multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
SDPatternOperator frag, X86MemOperand x86memop,
InstrItinClass itin> {
let isCodeGenOnly = 1 in {
let isCodeGenOnly = 1, usesCustomInserter = 1 in {
def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),
!strconcat(mnemonic, "\t$ptr"),
[(frag addr:$ptr)], itin>, TB, LOCK;

View File

@ -0,0 +1,35 @@
; RUN: llc < %s -march=x86 -stackrealign -O2 | FileCheck %s
; PR28755
; Check that register allocator is able to handle that
; a-lot-of-fixed-and-reserved-registers case. We do that by
; emmiting lea before 4 cmpxchg8b operands generators.
define void @foo_alloca(i64* %a, i32 %off, i32 %n) {
%dummy = alloca i32, i32 %n
%addr = getelementptr inbounds i64, i64* %a, i32 %off
%res = cmpxchg i64* %addr, i64 0, i64 1 monotonic monotonic
ret void
}
; CHECK-LABEL: foo_alloca
; CHECK: leal {{\(%e..,%e..,.*\)}}, [[REGISTER:%e.i]]
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: movl $1, %ebx
; CHECK-NEXT: lock cmpxchg8b ([[REGISTER]])
; If we don't use index register in the address mode -
; check that we did not generate the lea.
define void @foo_alloca_direct_address(i64* %addr, i32 %n) {
%dummy = alloca i32, i32 %n
%res = cmpxchg i64* %addr, i64 0, i64 1 monotonic monotonic
ret void
}
; CHECK-LABEL: foo_alloca_direct_address
; CHECK-NOT: leal {{\(%e.*\)}}, [[REGISTER:%e.i]]
; CHECK: lock cmpxchg8b ([[REGISTER]])