Committing X86-64 support.

llvm-svn: 30177
This commit is contained in:
Evan Cheng 2006-09-08 06:48:29 +00:00
parent 02a7d09b40
commit 11b0a5dbd4
25 changed files with 3607 additions and 466 deletions

View File

@ -0,0 +1,269 @@
//===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===//
Implement different PIC models? Right now we only support Mac OS X with small
PIC code model.
//===---------------------------------------------------------------------===//
Make use of "Red Zone".
//===---------------------------------------------------------------------===//
Implement __int128 and long double support.
//===---------------------------------------------------------------------===//
For this:
extern void xx(void);
void bar(void) {
xx();
}
gcc compiles to:
.globl _bar
_bar:
jmp _xx
We need to do the tailcall optimization as well.
//===---------------------------------------------------------------------===//
For this:
int test(int a)
{
return a * 3;
}
We generates
leal (%edi,%edi,2), %eax
We should be generating
leal (%rdi,%rdi,2), %eax
instead. The later form does not require an address-size prefix 67H.
It's probably ok to simply emit the corresponding 64-bit super class registers
in this case?
//===---------------------------------------------------------------------===//
AMD64 Optimization Manual 8.2 has some nice information about optimizing integer
multiplication by a constant. How much of it applies to Intel's X86-64
implementation? There are definite trade-offs to consider: latency vs. register
pressure vs. code size.
//===---------------------------------------------------------------------===//
Are we better off using branches instead of cmove to implement FP to
unsigned i64?
_conv:
ucomiss LC0(%rip), %xmm0
cvttss2siq %xmm0, %rdx
jb L3
subss LC0(%rip), %xmm0
movabsq $-9223372036854775808, %rax
cvttss2siq %xmm0, %rdx
xorq %rax, %rdx
L3:
movq %rdx, %rax
ret
instead of
_conv:
movss LCPI1_0(%rip), %xmm1
cvttss2siq %xmm0, %rcx
movaps %xmm0, %xmm2
subss %xmm1, %xmm2
cvttss2siq %xmm2, %rax
movabsq $-9223372036854775808, %rdx
xorq %rdx, %rax
ucomiss %xmm1, %xmm0
cmovb %rcx, %rax
ret
Seems like the jb branch has high likelyhood of being taken. It would have
saved a few instructions.
//===---------------------------------------------------------------------===//
Poor codegen:
int X[2];
int b;
void test(void) {
memset(X, b, 2*sizeof(X[0]));
}
llc:
movq _b@GOTPCREL(%rip), %rax
movzbq (%rax), %rax
movq %rax, %rcx
shlq $8, %rcx
orq %rax, %rcx
movq %rcx, %rax
shlq $16, %rax
orq %rcx, %rax
movq %rax, %rcx
shlq $32, %rcx
movq _X@GOTPCREL(%rip), %rdx
orq %rax, %rcx
movq %rcx, (%rdx)
ret
gcc:
movq _b@GOTPCREL(%rip), %rax
movabsq $72340172838076673, %rdx
movzbq (%rax), %rax
imulq %rdx, %rax
movq _X@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
ret
//===---------------------------------------------------------------------===//
Vararg function prologue can be further optimized. Currently all XMM registers
are stored into register save area. Most of them can be eliminated since the
upper bound of the number of XMM registers used are passed in %al. gcc produces
something like the following:
movzbl %al, %edx
leaq 0(,%rdx,4), %rax
leaq 4+L2(%rip), %rdx
leaq 239(%rsp), %rax
jmp *%rdx
movaps %xmm7, -15(%rax)
movaps %xmm6, -31(%rax)
movaps %xmm5, -47(%rax)
movaps %xmm4, -63(%rax)
movaps %xmm3, -79(%rax)
movaps %xmm2, -95(%rax)
movaps %xmm1, -111(%rax)
movaps %xmm0, -127(%rax)
L2:
It jumps over the movaps that do not need to be stored. Hard to see this being
significant as it added 5 instruciton (including a indirect branch) to avoid
executing 0 to 8 stores in the function prologue.
Perhaps we can optimize for the common case where no XMM registers are used for
parameter passing. i.e. is %al == 0 jump over all stores. Or in the case of a
leaf function where we can determine that no XMM input parameter is need, avoid
emitting the stores at all.
//===---------------------------------------------------------------------===//
AMD64 has a complex calling convention for aggregate passing by value:
1. If the size of an object is larger than two eightbytes, or in C++, is a non-
POD structure or union type, or contains unaligned fields, it has class
MEMORY.
2. Both eightbytes get initialized to class NO_CLASS.
3. Each field of an object is classified recursively so that always two fields
are considered. The resulting class is calculated according to the classes
of the fields in the eightbyte:
(a) If both classes are equal, this is the resulting class.
(b) If one of the classes is NO_CLASS, the resulting class is the other
class.
(c) If one of the classes is MEMORY, the result is the MEMORY class.
(d) If one of the classes is INTEGER, the result is the INTEGER.
(e) If one of the classes is X87, X87UP, COMPLEX_X87 class, MEMORY is used as
class.
(f) Otherwise class SSE is used.
4. Then a post merger cleanup is done:
(a) If one of the classes is MEMORY, the whole argument is passed in memory.
(b) If SSEUP is not preceeded by SSE, it is converted to SSE.
Currently llvm frontend does not handle this correctly.
Problem 1:
typedef struct { int i; double d; } QuadWordS;
It is currently passed in two i64 integer registers. However, gcc compiled
callee expects the second element 'd' to be passed in XMM0.
Problem 2:
typedef struct { int32_t i; float j; double d; } QuadWordS;
The size of the first two fields == i64 so they will be combined and passed in
a integer register RDI. The third field is still passed in XMM0.
Problem 3:
typedef struct { int64_t i; int8_t j; int64_t d; } S;
void test(S s)
The size of this aggregate is greater than two i64 so it should be passed in
memory. Currently llvm breaks this down and passed it in three integer
registers.
Problem 4:
Taking problem 3 one step ahead where a function expects a aggregate value
in memory followed by more parameter(s) passed in register(s).
void test(S s, int b)
LLVM IR does not allow parameter passing by aggregates, therefore it must break
the aggregates value (in problem 3 and 4) into a number of scalar values:
void %test(long %s.i, byte %s.j, long %s.d);
However, if the backend were to lower this code literally it would pass the 3
values in integer registers. To force it be passed in memory, the frontend
should change the function signiture to:
void %test(long %undef1, long %undef2, long %undef3, long %undef4,
long %undef5, long %undef6,
long %s.i, byte %s.j, long %s.d);
And the callee would look something like this:
call void %test( undef, undef, undef, undef, undef, undef,
%tmp.s.i, %tmp.s.j, %tmp.s.d );
The first 6 undef parameters would exhaust the 6 integer registers used for
parameter passing. The following three integer values would then be forced into
memory.
For problem 4, the parameter 'd' would be moved to the front of the parameter
list so it will be passed in register:
void %test(int %d,
long %undef1, long %undef2, long %undef3, long %undef4,
long %undef5, long %undef6,
long %s.i, byte %s.j, long %s.d);
//===---------------------------------------------------------------------===//
For this:
extern int dst[];
extern int* ptr;
void test(void) {
ptr = dst;
}
We generate this code for static relocation model:
_test:
leaq _dst(%rip), %rax
movq %rax, _ptr(%rip)
ret
If we are in small code model, they we can treat _dst as a 32-bit constant.
movq $_dst, _ptr(%rip)
Note, however, we should continue to use RIP relative addressing mode as much as
possible. The above is actually one byte shorter than
movq $_dst, _ptr
//===---------------------------------------------------------------------===//
Right now the asm printer assumes GlobalAddress are accessed via RIP relative
addressing. Therefore, it is not possible to generate this:
movabsq $__ZTV10polynomialIdE+16, %rax
That is ok for now since we currently only support small model. So the above
is selected as
leaq __ZTV10polynomialIdE+16(%rip), %rax
This is probably slightly slower but is much shorter than movabsq. However, if
we were to support medium or larger code models, we need to use the movabs
instruction. We should probably introduce something like AbsoluteAddress to
distinguish it from GlobalAddress so the asm printer and JIT code emitter can
do the right thing.

View File

@ -20,8 +20,8 @@ include "../Target.td"
// X86 Subtarget features.
//
def Feature64Bit : SubtargetFeature<"64bit", "Is64Bit", "true",
"Enable 64-bit instructions">;
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
"Support 64-bit instructions">;
def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
"Enable MMX instructions">;
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
@ -61,6 +61,8 @@ def : Proc<"prescott", [FeatureMMX, FeatureSSE1, FeatureSSE2,
FeatureSSE3]>;
def : Proc<"nocona", [FeatureMMX, FeatureSSE1, FeatureSSE2,
FeatureSSE3, Feature64Bit]>;
def : Proc<"core2", [FeatureMMX, FeatureSSE1, FeatureSSE2,
FeatureSSE3, Feature64Bit]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
@ -105,16 +107,20 @@ def X86InstrInfo : InstrInfo {
// should be kept up-to-date with the fields in the X86InstrInfo.h file.
let TSFlagsFields = ["FormBits",
"hasOpSizePrefix",
"hasAdSizePrefix",
"Prefix",
"hasREX_WPrefix",
"ImmTypeBits",
"FPFormBits",
"Opcode"];
let TSFlagsShifts = [0,
6,
7,
11,
8,
12,
13,
16];
16,
24];
}
// The X86 target supports two different syntaxes for emitting machine code.

View File

@ -126,8 +126,9 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
O << '%';
unsigned Reg = MO.getReg();
if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
MVT::ValueType VT = (strcmp(Modifier,"subreg16") == 0)
? MVT::i16 : MVT::i8;
MVT::ValueType VT = (strcmp(Modifier+6,"64") == 0) ?
MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
Reg = getX86SubSuperRegister(Reg, VT);
}
for (const char *Name = RI.get(Reg).Name; *Name; ++Name)
@ -148,9 +149,11 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
if (!isMemOp) O << '$';
O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << "_"
<< MO.getJumpTableIndex();
if (Subtarget->isTargetDarwin() &&
if (X86PICStyle == PICStyle::Stub &&
TM.getRelocationModel() == Reloc::PIC_)
O << "-\"L" << getFunctionNumber() << "$pb\"";
if (Subtarget->is64Bit())
O << "(%rip)";
return;
}
case MachineOperand::MO_ConstantPoolIndex: {
@ -158,7 +161,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
if (!isMemOp) O << '$';
O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
<< MO.getConstantPoolIndex();
if (Subtarget->isTargetDarwin() &&
if (X86PICStyle == PICStyle::Stub &&
TM.getRelocationModel() == Reloc::PIC_)
O << "-\"L" << getFunctionNumber() << "$pb\"";
int Offset = MO.getOffset();
@ -166,47 +169,59 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
O << "+" << Offset;
else if (Offset < 0)
O << Offset;
if (Subtarget->is64Bit())
O << "(%rip)";
return;
}
case MachineOperand::MO_GlobalAddress: {
bool isCallOp = Modifier && !strcmp(Modifier, "call");
bool isMemOp = Modifier && !strcmp(Modifier, "mem");
if (!isMemOp && !isCallOp) O << '$';
// Darwin block shameless ripped from PPCAsmPrinter.cpp
if (Subtarget->isTargetDarwin() &&
GlobalValue *GV = MO.getGlobal();
std::string Name = Mang->getValueName(GV);
bool isExt = (GV->isExternal() || GV->hasWeakLinkage() ||
GV->hasLinkOnceLinkage());
if (X86PICStyle == PICStyle::Stub &&
TM.getRelocationModel() != Reloc::Static) {
GlobalValue *GV = MO.getGlobal();
std::string Name = Mang->getValueName(GV);
// Link-once, External, or Weakly-linked global variables need
// non-lazily-resolved stubs
if (GV->isExternal() || GV->hasWeakLinkage() ||
GV->hasLinkOnceLinkage()) {
if (isExt) {
// Dynamically-resolved functions need a stub for the function.
if (isCallOp && isa<Function>(GV) && cast<Function>(GV)->isExternal()) {
if (isCallOp && isa<Function>(GV)) {
FnStubs.insert(Name);
O << "L" << Name << "$stub";
} else {
GVStubs.insert(Name);
O << "L" << Name << "$non_lazy_ptr";
}
} else {
O << Mang->getValueName(GV);
}
} else
O << Name;
if (!isCallOp && TM.getRelocationModel() == Reloc::PIC_)
O << "-\"L" << getFunctionNumber() << "$pb\"";
} else
O << Mang->getValueName(MO.getGlobal());
} else
O << Name;
int Offset = MO.getOffset();
if (Offset > 0)
O << "+" << Offset;
else if (Offset < 0)
O << Offset;
if (!isCallOp &&
Subtarget->is64Bit()) {
if (isExt && TM.getRelocationModel() != Reloc::Static)
O << "@GOTPCREL";
O << "(%rip)";
}
return;
}
case MachineOperand::MO_ExternalSymbol: {
bool isCallOp = Modifier && !strcmp(Modifier, "call");
if (isCallOp &&
Subtarget->isTargetDarwin() &&
X86PICStyle == PICStyle::Stub &&
TM.getRelocationModel() != Reloc::Static) {
std::string Name(TAI->getGlobalPrefix());
Name += MO.getSymbolName();
@ -216,6 +231,11 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
}
if (!isCallOp) O << '$';
O << TAI->getGlobalPrefix() << MO.getSymbolName();
if (!isCallOp &&
Subtarget->is64Bit())
O << "(%rip)";
return;
}
default:
@ -238,7 +258,8 @@ void X86ATTAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) {
}
}
void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
const char *Modifier){
assert(isMem(MI, Op) && "Invalid memory reference!");
const MachineOperand &BaseReg = MI->getOperand(Op);
@ -266,12 +287,13 @@ void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
if (IndexReg.getReg() || BaseReg.getReg()) {
O << "(";
if (BaseReg.getReg())
printOperand(MI, Op);
if (BaseReg.getReg()) {
printOperand(MI, Op, Modifier);
}
if (IndexReg.getReg()) {
O << ",";
printOperand(MI, Op+2);
printOperand(MI, Op+2, Modifier);
if (ScaleVal != 1)
O << "," << ScaleVal;
}
@ -350,43 +372,25 @@ bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
///
void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
++EmittedInsts;
// This works around some Darwin assembler bugs.
if (Subtarget->isTargetDarwin()) {
switch (MI->getOpcode()) {
case X86::REP_MOVSB:
O << "rep/movsb (%esi),(%edi)\n";
return;
case X86::REP_MOVSD:
O << "rep/movsl (%esi),(%edi)\n";
return;
case X86::REP_MOVSW:
O << "rep/movsw (%esi),(%edi)\n";
return;
case X86::REP_STOSB:
O << "rep/stosb\n";
return;
case X86::REP_STOSD:
O << "rep/stosl\n";
return;
case X86::REP_STOSW:
O << "rep/stosw\n";
return;
default:
break;
}
}
// See if a truncate instruction can be turned into a nop.
switch (MI->getOpcode()) {
default: break;
case X86::TRUNC_GR32_GR16:
case X86::TRUNC_GR32_GR8:
case X86::TRUNC_GR16_GR8: {
case X86::TRUNC_64to32:
case X86::TRUNC_64to16:
case X86::TRUNC_32to16:
case X86::TRUNC_32to8:
case X86::TRUNC_16to8:
case X86::TRUNC_32_to8:
case X86::TRUNC_16_to8: {
const MachineOperand &MO0 = MI->getOperand(0);
const MachineOperand &MO1 = MI->getOperand(1);
unsigned Reg0 = MO0.getReg();
unsigned Reg1 = MO1.getReg();
if (MI->getOpcode() == X86::TRUNC_GR32_GR16)
unsigned Opc = MI->getOpcode();
if (Opc == X86::TRUNC_64to32)
Reg1 = getX86SubSuperRegister(Reg1, MVT::i32);
else if (Opc == X86::TRUNC_32to16 || Opc == X86::TRUNC_64to16)
Reg1 = getX86SubSuperRegister(Reg1, MVT::i16);
else
Reg1 = getX86SubSuperRegister(Reg1, MVT::i8);
@ -395,6 +399,9 @@ void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
O << "\n\t";
break;
}
case X86::PsMOVZX64rr32:
O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t";
break;
}
// Call the autogenerated instruction printer routines.

View File

@ -60,6 +60,9 @@ struct X86ATTAsmPrinter : public X86SharedAsmPrinter {
void printf128mem(const MachineInstr *MI, unsigned OpNo) {
printMemReference(MI, OpNo);
}
void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) {
printMemReference(MI, OpNo, "subreg64");
}
bool printAsmMRegister(const MachineOperand &MO, const char Mode);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@ -69,7 +72,8 @@ struct X86ATTAsmPrinter : public X86SharedAsmPrinter {
void printMachineInstruction(const MachineInstr *MI);
void printSSECC(const MachineInstr *MI, unsigned Op);
void printMemReference(const MachineInstr *MI, unsigned Op);
void printMemReference(const MachineInstr *MI, unsigned Op,
const char *Modifier=NULL);
void printPICLabel(const MachineInstr *MI, unsigned Op);
bool runOnMachineFunction(MachineFunction &F);
};

View File

@ -30,8 +30,12 @@ Statistic<> llvm::EmittedInsts("asm-printer",
"Number of machine instrs printed");
/// doInitialization
bool X86SharedAsmPrinter::doInitialization(Module &M) {
bool X86SharedAsmPrinter::doInitialization(Module &M) {
if (Subtarget->isTargetDarwin()) {
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
if (!Subtarget->is64Bit())
X86PICStyle = PICStyle::Stub;
// Emit initial debug information.
DW.BeginModule(&M);
}

View File

@ -29,12 +29,19 @@ namespace llvm {
extern Statistic<> EmittedInsts;
// FIXME: Move this to CodeGen/AsmPrinter.h
namespace PICStyle {
enum X86AsmPICStyle {
Stub, GOT
};
}
struct VISIBILITY_HIDDEN X86SharedAsmPrinter : public AsmPrinter {
DwarfWriter DW;
X86SharedAsmPrinter(std::ostream &O, X86TargetMachine &TM,
const TargetAsmInfo *T)
: AsmPrinter(O, TM, T), DW(O, this, T) {
: AsmPrinter(O, TM, T), DW(O, this, T), X86PICStyle(PICStyle::GOT) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
}
@ -49,6 +56,8 @@ struct VISIBILITY_HIDDEN X86SharedAsmPrinter : public AsmPrinter {
MachineFunctionPass::getAnalysisUsage(AU);
}
PICStyle::X86AsmPICStyle X86PICStyle;
const X86Subtarget *Subtarget;
// Necessary for Darwin to print out the apprioriate types of linker stubs

View File

@ -12,6 +12,8 @@
//
//===----------------------------------------------------------------------===//
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "X86Relocations.h"
#include "X86.h"
@ -35,14 +37,16 @@ namespace {
namespace {
class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass {
const X86InstrInfo *II;
TargetMachine &TM;
const TargetData *TD;
TargetMachine &TM;
MachineCodeEmitter &MCE;
bool Is64BitMode;
public:
explicit Emitter(TargetMachine &tm, MachineCodeEmitter &mce)
: II(0), TM(tm), MCE(mce) {}
: II(0), TD(0), TM(tm), MCE(mce), Is64BitMode(false) {}
Emitter(TargetMachine &tm, MachineCodeEmitter &mce,
const X86InstrInfo& ii)
: II(&ii), TM(tm), MCE(mce) {}
const X86InstrInfo &ii, const TargetData &td, bool is64)
: II(&ii), TD(&td), TM(tm), MCE(mce), Is64BitMode(is64) {}
bool runOnMachineFunction(MachineFunction &MF);
@ -54,20 +58,29 @@ namespace {
private:
void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
void emitPCRelativeValue(unsigned Address);
void emitGlobalAddressForCall(GlobalValue *GV, bool isTailCall);
void emitGlobalAddressForPtr(GlobalValue *GV, int Disp = 0);
void emitPCRelativeValue(intptr_t Address);
void emitGlobalAddressForCall(GlobalValue *GV, bool DoesntNeedStub);
void emitGlobalAddressForPtr(GlobalValue *GV, bool isPCRelative,
int Disp = 0, unsigned PCAdj = 0);
void emitExternalSymbolAddress(const char *ES, bool isPCRelative);
void emitPCRelativeConstPoolAddress(unsigned CPI, int Disp = 0,
unsigned PCAdj = 0);
void emitPCRelativeJumpTableAddress(unsigned JTI, unsigned PCAdj = 0);
void emitDisplacementField(const MachineOperand *RelocOp, int DispVal);
void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
unsigned PCAdj = 0);
void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
void emitConstant(unsigned Val, unsigned Size);
void emitConstant(uint64_t Val, unsigned Size);
void emitMemModRMByte(const MachineInstr &MI,
unsigned Op, unsigned RegOpcodeField);
unsigned Op, unsigned RegOpcodeField,
unsigned PCAdj = 0);
unsigned getX86RegNum(unsigned RegNo);
bool isX86_64ExtendedReg(const MachineOperand &MO);
unsigned determineREX(const MachineInstr &MI);
};
}
@ -83,6 +96,9 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) {
MF.getTarget().getRelocationModel() != Reloc::Static) &&
"JIT relocation model must be set to static or default!");
II = ((X86TargetMachine&)MF.getTarget()).getInstrInfo();
TD = ((X86TargetMachine&)MF.getTarget()).getTargetData();
Is64BitMode =
((X86TargetMachine&)MF.getTarget()).getSubtarget<X86Subtarget>().is64Bit();
do {
MCE.startFunction(MF);
@ -98,9 +114,9 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) {
return false;
}
/// emitPCRelativeValue - Emit a 32-bit PC relative address.
/// emitPCRelativeValue - Emit a PC relative address.
///
void Emitter::emitPCRelativeValue(unsigned Address) {
void Emitter::emitPCRelativeValue(intptr_t Address) {
MCE.emitWordLE(Address-MCE.getCurrentPCValue()-4);
}
@ -119,20 +135,22 @@ void Emitter::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
/// emitGlobalAddressForCall - Emit the specified address to the code stream
/// assuming this is part of a function call, which is PC relative.
///
void Emitter::emitGlobalAddressForCall(GlobalValue *GV, bool isTailCall) {
void Emitter::emitGlobalAddressForCall(GlobalValue *GV, bool DoesntNeedStub) {
MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
X86::reloc_pcrel_word, GV, 0,
!isTailCall /*Doesn'tNeedStub*/));
DoesntNeedStub));
MCE.emitWordLE(0);
}
/// emitGlobalAddress - Emit the specified address to the code stream assuming
/// this is part of a "take the address of a global" instruction, which is not
/// PC relative.
/// this is part of a "take the address of a global" instruction.
///
void Emitter::emitGlobalAddressForPtr(GlobalValue *GV, int Disp /* = 0 */) {
MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
X86::reloc_absolute_word, GV));
void Emitter::emitGlobalAddressForPtr(GlobalValue *GV, bool isPCRelative,
int Disp /* = 0 */,
unsigned PCAdj /* = 0 */) {
unsigned rt = isPCRelative ? X86::reloc_pcrel_word : X86::reloc_absolute_word;
MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), rt,
GV, PCAdj));
MCE.emitWordLE(Disp); // The relocated value will be added to the displacement
}
@ -145,6 +163,26 @@ void Emitter::emitExternalSymbolAddress(const char *ES, bool isPCRelative) {
MCE.emitWordLE(0);
}
/// emitPCRelativeConstPoolAddress - Arrange for the address of an constant pool
/// to be emitted to the current location in the function, and allow it to be PC
/// relative.
void Emitter::emitPCRelativeConstPoolAddress(unsigned CPI, int Disp /* = 0 */,
unsigned PCAdj /* = 0 */) {
MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
X86::reloc_pcrel_word, CPI, PCAdj));
MCE.emitWordLE(Disp); // The relocated value will be added to the displacement
}
/// emitPCRelativeJumpTableAddress - Arrange for the address of a jump table to
/// be emitted to the current location in the function, and allow it to be PC
/// relative.
void Emitter::emitPCRelativeJumpTableAddress(unsigned JTI,
unsigned PCAdj /* = 0 */) {
MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
X86::reloc_pcrel_word, JTI, PCAdj));
MCE.emitWordLE(0); // The relocated value will be added to the displacement
}
/// N86 namespace - Native X86 Register numbers... used by X86 backend.
///
namespace N86 {
@ -153,28 +191,53 @@ namespace N86 {
};
}
// getX86RegNum - This function maps LLVM register identifiers to their X86
// specific numbering, which is used in various places encoding instructions.
//
static unsigned getX86RegNum(unsigned RegNo) {
unsigned Emitter::getX86RegNum(unsigned RegNo) {
switch(RegNo) {
case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
case X86::ESP: case X86::SP: case X86::AH: return N86::ESP;
case X86::EBP: case X86::BP: case X86::CH: return N86::EBP;
case X86::ESI: case X86::SI: case X86::DH: return N86::ESI;
case X86::EDI: case X86::DI: case X86::BH: return N86::EDI;
case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
return N86::ESP;
case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
return N86::EBP;
case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
return N86::ESI;
case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
return N86::EDI;
case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
return N86::EAX;
case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
return N86::ECX;
case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
return N86::EDX;
case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
return N86::EBX;
case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
return N86::ESP;
case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
return N86::EBP;
case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
return N86::ESI;
case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
return N86::EDI;
case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
return RegNo-X86::ST0;
case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7:
return RegNo-X86::XMM0;
case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7:
return II->getRegisterInfo().getDwarfRegNum(RegNo) -
II->getRegisterInfo().getDwarfRegNum(X86::XMM0);
case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
return II->getRegisterInfo().getDwarfRegNum(RegNo) -
II->getRegisterInfo().getDwarfRegNum(X86::XMM8);
default:
assert(MRegisterInfo::isVirtualRegister(RegNo) &&
@ -199,7 +262,7 @@ void Emitter::emitSIBByte(unsigned SS, unsigned Index, unsigned Base) {
MCE.emitByte(ModRMByte(SS, Index, Base));
}
void Emitter::emitConstant(unsigned Val, unsigned Size) {
void Emitter::emitConstant(uint64_t Val, unsigned Size) {
// Output the constant in little endian byte order...
for (unsigned i = 0; i != Size; ++i) {
MCE.emitByte(Val & 255);
@ -214,7 +277,7 @@ static bool isDisp8(int Value) {
}
void Emitter::emitDisplacementField(const MachineOperand *RelocOp,
int DispVal) {
int DispVal, unsigned PCAdj) {
// If this is a simple integer displacement that doesn't require a relocation,
// emit it now.
if (!RelocOp) {
@ -225,14 +288,27 @@ void Emitter::emitDisplacementField(const MachineOperand *RelocOp,
// Otherwise, this is something that requires a relocation. Emit it as such
// now.
if (RelocOp->isGlobalAddress()) {
emitGlobalAddressForPtr(RelocOp->getGlobal(), RelocOp->getOffset());
// In 64-bit static small code model, we could potentially emit absolute.
// But it's probably not beneficial.
// 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
// 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
emitGlobalAddressForPtr(RelocOp->getGlobal(), Is64BitMode,
RelocOp->getOffset(), PCAdj);
} else if (RelocOp->isConstantPoolIndex()) {
// Must be in 64-bit mode.
emitPCRelativeConstPoolAddress(RelocOp->getConstantPoolIndex(),
RelocOp->getOffset(), PCAdj);
} else if (RelocOp->isJumpTableIndex()) {
// Must be in 64-bit mode.
emitPCRelativeJumpTableAddress(RelocOp->getJumpTableIndex(), PCAdj);
} else {
assert(0 && "Unknown value to relocate!");
}
}
void Emitter::emitMemModRMByte(const MachineInstr &MI,
unsigned Op, unsigned RegOpcodeField) {
unsigned Op, unsigned RegOpcodeField,
unsigned PCAdj) {
const MachineOperand &Op3 = MI.getOperand(Op+3);
int DispVal = 0;
const MachineOperand *DispForReloc = 0;
@ -241,10 +317,18 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI,
if (Op3.isGlobalAddress()) {
DispForReloc = &Op3;
} else if (Op3.isConstantPoolIndex()) {
DispVal += MCE.getConstantPoolEntryAddress(Op3.getConstantPoolIndex());
DispVal += Op3.getOffset();
if (Is64BitMode) {
DispForReloc = &Op3;
} else {
DispVal += MCE.getConstantPoolEntryAddress(Op3.getConstantPoolIndex());
DispVal += Op3.getOffset();
}
} else if (Op3.isJumpTableIndex()) {
DispVal += MCE.getJumpTableEntryAddress(Op3.getJumpTableIndex());
if (Is64BitMode) {
DispForReloc = &Op3;
} else {
DispVal += MCE.getJumpTableEntryAddress(Op3.getJumpTableIndex());
}
} else {
DispVal = Op3.getImm();
}
@ -256,12 +340,13 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI,
unsigned BaseReg = Base.getReg();
// Is a SIB byte needed?
if (IndexReg.getReg() == 0 && BaseReg != X86::ESP) {
if (IndexReg.getReg() == 0 &&
(BaseReg == 0 || getX86RegNum(BaseReg) != N86::ESP)) {
if (BaseReg == 0) { // Just a displacement?
// Emit special case [disp32] encoding
MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
emitDisplacementField(DispForReloc, DispVal);
emitDisplacementField(DispForReloc, DispVal, PCAdj);
} else {
unsigned BaseRegNo = getX86RegNum(BaseReg);
if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
@ -274,12 +359,13 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI,
} else {
// Emit the most general non-SIB encoding: [REG+disp32]
MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
emitDisplacementField(DispForReloc, DispVal);
emitDisplacementField(DispForReloc, DispVal, PCAdj);
}
}
} else { // We need a SIB byte, so start by outputting the ModR/M byte first
assert(IndexReg.getReg() != X86::ESP && "Cannot use ESP as index reg!");
assert(IndexReg.getReg() != X86::ESP &&
IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
bool ForceDisp32 = false;
bool ForceDisp8 = false;
@ -292,7 +378,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI,
// Emit the normal disp32 encoding.
MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
ForceDisp32 = true;
} else if (DispVal == 0 && BaseReg != X86::EBP) {
} else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) {
// Emit no displacement ModR/M byte
MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
} else if (isDisp8(DispVal)) {
@ -327,7 +413,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI,
if (ForceDisp8) {
emitConstant(DispVal, 1);
} else if (DispVal != 0 || ForceDisp32) {
emitDisplacementField(DispForReloc, DispVal);
emitDisplacementField(DispForReloc, DispVal, PCAdj);
}
}
}
@ -337,11 +423,131 @@ static unsigned sizeOfImm(const TargetInstrDescriptor &Desc) {
case X86II::Imm8: return 1;
case X86II::Imm16: return 2;
case X86II::Imm32: return 4;
case X86II::Imm64: return 8;
default: assert(0 && "Immediate size not set!");
return 0;
}
}
/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register?
/// e.g. r8, xmm8, etc.
bool Emitter::isX86_64ExtendedReg(const MachineOperand &MO) {
if (!MO.isRegister()) return false;
unsigned RegNo = MO.getReg();
int DWNum = II->getRegisterInfo().getDwarfRegNum(RegNo);
if (DWNum >= II->getRegisterInfo().getDwarfRegNum(X86::R8) &&
DWNum <= II->getRegisterInfo().getDwarfRegNum(X86::R15))
return true;
if (DWNum >= II->getRegisterInfo().getDwarfRegNum(X86::XMM8) &&
DWNum <= II->getRegisterInfo().getDwarfRegNum(X86::XMM15))
return true;
return false;
}
inline static bool isX86_64TruncToByte(unsigned oc) {
return (oc == X86::TRUNC_64to8 || oc == X86::TRUNC_32to8 ||
oc == X86::TRUNC_16to8);
}
inline static bool isX86_64NonExtLowByteReg(unsigned reg) {
return (reg == X86::SPL || reg == X86::BPL ||
reg == X86::SIL || reg == X86::DIL);
}
/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
/// size, and 3) use of X86-64 extended registers.
unsigned Emitter::determineREX(const MachineInstr &MI) {
unsigned REX = 0;
unsigned Opcode = MI.getOpcode();
const TargetInstrDescriptor &Desc = II->get(Opcode);
// Pseudo instructions do not need REX prefix byte.
if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
return 0;
if (Desc.TSFlags & X86II::REX_W)
REX |= 1 << 3;
if (MI.getNumOperands()) {
// If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
bool isTrunc8 = isX86_64TruncToByte(Opcode);
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand& MO = MI.getOperand(i);
if (MO.isRegister()) {
unsigned Reg = MO.getReg();
// Trunc to byte are actually movb. The real source operand is the low
// byte of the register.
if (isTrunc8 && i == 1)
Reg = getX86SubSuperRegister(Reg, MVT::i8);
if (isX86_64NonExtLowByteReg(Reg))
REX |= 0x40;
}
}
switch (Desc.TSFlags & X86II::FormMask) {
case X86II::MRMInitReg:
if (isX86_64ExtendedReg(MI.getOperand(0)))
REX |= (1 << 0) | (1 << 2);
break;
case X86II::MRMSrcReg: {
if (isX86_64ExtendedReg(MI.getOperand(0)))
REX |= 1 << 2;
for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand& MO = MI.getOperand(i);
if (isX86_64ExtendedReg(MO))
REX |= 1 << 0;
}
break;
}
case X86II::MRMSrcMem: {
if (isX86_64ExtendedReg(MI.getOperand(0)))
REX |= 1 << 2;
unsigned Bit = 0;
for (unsigned i = 1; i != 5; ++i) {
const MachineOperand& MO = MI.getOperand(i);
if (MO.isRegister()) {
if (isX86_64ExtendedReg(MO))
REX |= 1 << Bit;
Bit++;
}
}
break;
}
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m:
case X86II::MRMDestMem: {
if (MI.getNumOperands() >= 5 &&
isX86_64ExtendedReg(MI.getOperand(4)))
REX |= 1 << 2;
unsigned Bit = 0;
for (unsigned i = 0; i != 4; ++i) {
const MachineOperand& MO = MI.getOperand(i);
if (MO.isRegister()) {
if (isX86_64ExtendedReg(MO))
REX |= 1 << Bit;
Bit++;
}
}
break;
}
default: {
if (isX86_64ExtendedReg(MI.getOperand(0)))
REX |= 1 << 0;
for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand& MO = MI.getOperand(i);
if (isX86_64ExtendedReg(MO))
REX |= 1 << 2;
}
break;
}
}
}
return REX;
}
void Emitter::emitInstruction(const MachineInstr &MI) {
NumEmitted++; // Keep track of the # of mi's emitted
@ -354,18 +560,22 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
// Emit the operand size opcode prefix as needed.
if (Desc.TSFlags & X86II::OpSize) MCE.emitByte(0x66);
// Emit the address size opcode prefix as needed.
if (Desc.TSFlags & X86II::AdSize) MCE.emitByte(0x67);
bool Need0FPrefix = false;
switch (Desc.TSFlags & X86II::Op0Mask) {
case X86II::TB:
MCE.emitByte(0x0F); // Two-byte opcode prefix
Need0FPrefix = true; // Two-byte opcode prefix
break;
case X86II::REP: break; // already handled.
case X86II::XS: // F3 0F
MCE.emitByte(0xF3);
MCE.emitByte(0x0F);
Need0FPrefix = true;
break;
case X86II::XD: // F2 0F
MCE.emitByte(0xF2);
MCE.emitByte(0x0F);
Need0FPrefix = true;
break;
case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
@ -377,6 +587,17 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
case 0: break; // No prefix!
}
if (Is64BitMode) {
// REX prefix
unsigned REX = determineREX(MI);
if (REX)
MCE.emitByte(0x40 | REX);
}
// 0x0F escape code must be emitted just before the opcode.
if (Need0FPrefix)
MCE.emitByte(0x0F);
// If this is a two-address instruction, skip one of the register operands.
unsigned CurOp = 0;
CurOp += (Desc.Flags & M_2_ADDR_FLAG) != 0;
@ -397,6 +618,7 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
case X86::IMPLICIT_DEF_GR8:
case X86::IMPLICIT_DEF_GR16:
case X86::IMPLICIT_DEF_GR32:
case X86::IMPLICIT_DEF_GR64:
case X86::IMPLICIT_DEF_FR32:
case X86::IMPLICIT_DEF_FR64:
case X86::IMPLICIT_DEF_VR64:
@ -417,7 +639,7 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
} else if (MO.isGlobalAddress()) {
bool isTailCall = Opcode == X86::TAILJMPd ||
Opcode == X86::TAILJMPr || Opcode == X86::TAILJMPm;
emitGlobalAddressForCall(MO.getGlobal(), isTailCall);
emitGlobalAddressForCall(MO.getGlobal(), !isTailCall);
} else if (MO.isExternalSymbol()) {
emitExternalSymbolAddress(MO.getSymbolName(), true);
} else if (MO.isImmediate()) {
@ -434,15 +656,15 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
if (CurOp != MI.getNumOperands()) {
const MachineOperand &MO1 = MI.getOperand(CurOp++);
if (MO1.isGlobalAddress()) {
assert(sizeOfImm(Desc) == 4 &&
assert(sizeOfImm(Desc) == TD->getPointerSize() &&
"Don't know how to emit non-pointer values!");
emitGlobalAddressForPtr(MO1.getGlobal(), MO1.getOffset());
emitGlobalAddressForPtr(MO1.getGlobal(), Is64BitMode, MO1.getOffset());
} else if (MO1.isExternalSymbol()) {
assert(sizeOfImm(Desc) == 4 &&
assert(sizeOfImm(Desc) == TD->getPointerSize() &&
"Don't know how to emit non-pointer values!");
emitExternalSymbolAddress(MO1.getSymbolName(), false);
} else if (MO1.isJumpTableIndex()) {
assert(sizeOfImm(Desc) == 4 &&
assert(sizeOfImm(Desc) == TD->getPointerSize() &&
"Don't know how to emit non-pointer values!");
emitConstant(MCE.getJumpTableEntryAddress(MO1.getJumpTableIndex()), 4);
} else {
@ -460,13 +682,14 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
break;
}
case X86II::MRMDestMem:
case X86II::MRMDestMem: {
MCE.emitByte(BaseOpcode);
emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(CurOp+4).getReg()));
CurOp += 5;
if (CurOp != MI.getNumOperands())
emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
break;
}
case X86II::MRMSrcReg:
MCE.emitByte(BaseOpcode);
@ -477,13 +700,17 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
break;
case X86II::MRMSrcMem:
case X86II::MRMSrcMem: {
unsigned PCAdj = (CurOp+5 != MI.getNumOperands()) ? sizeOfImm(Desc) : 0;
MCE.emitByte(BaseOpcode);
emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()));
emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()),
PCAdj);
CurOp += 5;
if (CurOp != MI.getNumOperands())
emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
break;
}
case X86II::MRM0r: case X86II::MRM1r:
case X86II::MRM2r: case X86II::MRM3r:
@ -500,9 +727,13 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m:
case X86II::MRM6m: case X86II::MRM7m: {
unsigned PCAdj = (CurOp+4 != MI.getNumOperands()) ?
(MI.getOperand(CurOp+4).isImmediate() ? sizeOfImm(Desc) : 4) : 0;
MCE.emitByte(BaseOpcode);
emitMemModRMByte(MI, CurOp, (Desc.TSFlags & X86II::FormMask)-X86II::MRM0m);
emitMemModRMByte(MI, CurOp, (Desc.TSFlags & X86II::FormMask)-X86II::MRM0m,
PCAdj);
CurOp += 4;
if (CurOp != MI.getNumOperands()) {
@ -510,13 +741,14 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
if (MO.isImmediate())
emitConstant(MO.getImm(), sizeOfImm(Desc));
else if (MO.isGlobalAddress())
emitGlobalAddressForPtr(MO.getGlobal(), MO.getOffset());
emitGlobalAddressForPtr(MO.getGlobal(), Is64BitMode, MO.getOffset());
else if (MO.isJumpTableIndex())
emitConstant(MCE.getJumpTableEntryAddress(MO.getJumpTableIndex()), 4);
else
assert(0 && "Unknown operand!");
}
break;
}
case X86II::MRMInitReg:
MCE.emitByte(BaseOpcode);

View File

@ -30,8 +30,9 @@
#include "llvm/CodeGen/SSARegMap.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/ADT/Statistic.h"
#include <deque>
#include <iostream>
@ -58,16 +59,19 @@ namespace {
int FrameIndex;
} Base;
bool isRIPRel; // RIP relative?
unsigned Scale;
SDOperand IndexReg;
unsigned Disp;
GlobalValue *GV;
Constant *CP;
const char *ES;
int JT;
unsigned Align; // CP alignment.
X86ISelAddressMode()
: BaseType(RegBase), Scale(1), IndexReg(), Disp(0), GV(0),
CP(0), Align(0) {
: BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
GV(0), CP(0), ES(0), JT(-1), Align(0) {
}
};
}
@ -92,6 +96,10 @@ namespace {
///
bool FastISel;
/// TM - Keep a reference to X86TargetMachine.
///
X86TargetMachine &TM;
/// X86Lowering - This object fully describes how to lower LLVM code to an
/// X86-specific SelectionDAG.
X86TargetLowering X86Lowering;
@ -100,12 +108,14 @@ namespace {
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
/// GlobalBaseReg - keeps track of the virtual register mapped onto global
/// base register.
unsigned GlobalBaseReg;
public:
X86DAGToDAGISel(X86TargetMachine &TM, bool fast)
X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
: SelectionDAGISel(X86Lowering),
ContainsFPCode(false), FastISel(fast),
ContainsFPCode(false), FastISel(fast), TM(tm),
X86Lowering(*TM.getTargetLowering()),
Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
@ -156,13 +166,22 @@ namespace {
SDOperand &Scale, SDOperand &Index,
SDOperand &Disp) {
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, MVT::i32) : AM.Base.Reg;
CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
AM.Base.Reg;
Scale = getI8Imm(AM.Scale);
Index = AM.IndexReg;
Disp = AM.GV ? CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp)
: (AM.CP ?
CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp)
: getI32Imm(AM.Disp));
// These are 32-bit even in 64-bit mode since RIP relative offset
// is 32-bit.
if (AM.GV)
Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
else if (AM.CP)
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp);
else if (AM.ES)
Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
else if (AM.JT != -1)
Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
else
Disp = getI32Imm(AM.Disp);
}
/// getI8Imm - Return a target constant with the specified value, of type
@ -476,26 +495,56 @@ void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
/// addressing mode
bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
bool isRoot) {
// RIP relative addressing: %rip + 32-bit displacement!
if (AM.isRIPRel) {
if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
uint64_t Val = cast<ConstantSDNode>(N)->getValue();
if (isInt32(AM.Disp + Val)) {
AM.Disp += Val;
return false;
}
}
return true;
}
int id = N.Val->getNodeId();
bool Available = isSelected(id);
switch (N.getOpcode()) {
default: break;
case ISD::Constant:
AM.Disp += cast<ConstantSDNode>(N)->getValue();
return false;
case ISD::Constant: {
uint64_t Val = cast<ConstantSDNode>(N)->getValue();
if (isInt32(AM.Disp + Val)) {
AM.Disp += Val;
return false;
}
break;
}
case X86ISD::Wrapper:
// If both base and index components have been picked, we can't fit
// the result available in the register in the addressing mode. Duplicate
// GlobalAddress or ConstantPool as displacement.
if (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val)) {
// If value is available in a register both base and index components have
// been picked, we can't fit the result available in the register in the
// addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
// Can't fit GV or CP in addressing mode for X86-64 medium or large code
// model since the displacement field is 32-bit. Ok for small code model.
// For X86-64 PIC code, only allow GV / CP + displacement so we can use RIP
// relative addressing mode.
if ((!Subtarget->is64Bit() || TM.getCodeModel() == CodeModel::Small) &&
(!Available || (AM.Base.Reg.Val && AM.IndexReg.Val))) {
bool isRIP = Subtarget->is64Bit();
if (isRIP && (AM.Base.Reg.Val || AM.Scale > 1 || AM.IndexReg.Val ||
AM.BaseType == X86ISelAddressMode::FrameIndexBase))
break;
if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(N.getOperand(0))) {
if (AM.CP == 0) {
AM.CP = CP->get();
AM.Align = CP->getAlignment();
AM.Disp += CP->getOffset();
if (isRIP)
AM.isRIPRel = true;
return false;
}
} else if (GlobalAddressSDNode *G =
@ -503,6 +552,20 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
if (AM.GV == 0) {
AM.GV = G->getGlobal();
AM.Disp += G->getOffset();
if (isRIP)
AM.isRIPRel = true;
return false;
}
} else if (isRoot && isRIP) {
if (ExternalSymbolSDNode *S =
dyn_cast<ExternalSymbolSDNode>(N.getOperand(0))) {
AM.ES = S->getSymbol();
AM.isRIPRel = true;
return false;
} else if (JumpTableSDNode *J =
dyn_cast<JumpTableSDNode>(N.getOperand(0))) {
AM.JT = J->getIndex();
AM.isRIPRel = true;
return false;
}
}
@ -533,7 +596,11 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
AM.IndexReg = ShVal.Val->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.Val->getOperand(1));
AM.Disp += AddVal->getValue() << Val;
uint64_t Disp = AM.Disp + AddVal->getValue() << Val;
if (isInt32(Disp))
AM.Disp = Disp;
else
AM.IndexReg = ShVal;
} else {
AM.IndexReg = ShVal;
}
@ -563,7 +630,11 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
Reg = MulVal.Val->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(MulVal.Val->getOperand(1));
AM.Disp += AddVal->getValue() * CN->getValue();
uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue();
if (isInt32(Disp))
AM.Disp = Disp;
else
Reg = N.Val->getOperand(0);
} else {
Reg = N.Val->getOperand(0);
}
@ -641,13 +712,14 @@ bool X86DAGToDAGISel::SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
if (MatchAddress(N, AM))
return false;
MVT::ValueType VT = N.getValueType();
if (AM.BaseType == X86ISelAddressMode::RegBase) {
if (!AM.Base.Reg.Val)
AM.Base.Reg = CurDAG->getRegister(0, MVT::i32);
AM.Base.Reg = CurDAG->getRegister(0, VT);
}
if (!AM.IndexReg.Val)
AM.IndexReg = CurDAG->getRegister(0, MVT::i32);
AM.IndexReg = CurDAG->getRegister(0, VT);
getAddressOperands(AM, Base, Scale, Index, Disp);
return true;
@ -662,19 +734,20 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base,
if (MatchAddress(N, AM))
return false;
MVT::ValueType VT = N.getValueType();
unsigned Complexity = 0;
if (AM.BaseType == X86ISelAddressMode::RegBase)
if (AM.Base.Reg.Val)
Complexity = 1;
else
AM.Base.Reg = CurDAG->getRegister(0, MVT::i32);
AM.Base.Reg = CurDAG->getRegister(0, VT);
else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
Complexity = 4;
if (AM.IndexReg.Val)
Complexity++;
else
AM.IndexReg = CurDAG->getRegister(0, MVT::i32);
AM.IndexReg = CurDAG->getRegister(0, VT);
if (AM.Scale > 2)
Complexity += 2;
@ -687,8 +760,14 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base,
// optimal (especially for code size consideration). LEA is nice because of
// its three-address nature. Tweak the cost function again when we can run
// convertToThreeAddress() at register allocation time.
if (AM.GV || AM.CP)
Complexity += 2;
if (AM.GV || AM.CP || AM.ES || AM.JT != -1) {
// For X86-64, we should always use lea to materialize RIP relative
// addresses.
if (Subtarget->is64Bit())
Complexity = 4;
else
Complexity += 2;
}
if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val))
Complexity++;
@ -721,6 +800,7 @@ static bool isRegister0(SDOperand Op) {
/// base address to use for accessing globals into a register.
///
SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing");
if (!GlobalBaseReg) {
// Insert the set of GlobalBaseReg into the first MBB of the function
MachineBasicBlock &FirstMBB = BB->getParent()->front();
@ -732,7 +812,7 @@ SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
BuildMI(FirstMBB, MBBI, X86::MovePCtoStack, 0);
BuildMI(FirstMBB, MBBI, X86::POP32r, 1, GlobalBaseReg);
}
return CurDAG->getRegister(GlobalBaseReg, MVT::i32).Val;
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val;
}
static SDNode *FindCallStartFromCall(SDNode *Node) {
@ -776,9 +856,11 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
// Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
// code and is matched first so to prevent it from being turned into
// LEA32r X+c.
// In 64-bit mode, use LEA to take advantage of RIP-relative addressing.
MVT::ValueType PtrVT = TLI.getPointerTy();
SDOperand N0 = N.getOperand(0);
SDOperand N1 = N.getOperand(1);
if (N.Val->getValueType(0) == MVT::i32 &&
if (N.Val->getValueType(0) == PtrVT &&
N0.getOpcode() == X86ISD::Wrapper &&
N1.getOpcode() == ISD::Constant) {
unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue();
@ -786,17 +868,23 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
// TODO: handle ExternalSymbolSDNode.
if (GlobalAddressSDNode *G =
dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) {
C = CurDAG->getTargetGlobalAddress(G->getGlobal(), MVT::i32,
C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT,
G->getOffset() + Offset);
} else if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) {
C = CurDAG->getTargetConstantPool(CP->get(), MVT::i32,
C = CurDAG->getTargetConstantPool(CP->get(), PtrVT,
CP->getAlignment(),
CP->getOffset()+Offset);
}
if (C.Val)
return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, MVT::i32, C);
if (C.Val) {
if (Subtarget->is64Bit()) {
SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1),
CurDAG->getRegister(0, PtrVT), C };
return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4);
} else
return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C);
}
}
// Other cases are handled by auto-generated code.
@ -811,6 +899,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
}
else
switch (NVT) {
@ -818,6 +907,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
}
unsigned LoReg, HiReg;
@ -826,6 +916,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
}
SDOperand N0 = Node->getOperand(0);
@ -899,6 +990,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
}
else
switch (NVT) {
@ -906,6 +998,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
}
unsigned LoReg, HiReg;
@ -927,6 +1020,11 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
ClrOpcode = X86::MOV32r0;
SExtOpcode = X86::CDQ;
break;
case MVT::i64:
LoReg = X86::RAX; HiReg = X86::RDX;
ClrOpcode = X86::MOV64r0;
SExtOpcode = X86::CQO;
break;
}
SDOperand N0 = Node->getOperand(0);
@ -994,7 +1092,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
}
case ISD::TRUNCATE: {
if (NVT == MVT::i8) {
if (!Subtarget->is64Bit() && NVT == MVT::i8) {
unsigned Opc2;
MVT::ValueType VT;
switch (Node->getOperand(0).getValueType()) {
@ -1002,12 +1100,12 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
case MVT::i16:
Opc = X86::MOV16to16_;
VT = MVT::i16;
Opc2 = X86::TRUNC_GR16_GR8;
Opc2 = X86::TRUNC_16_to8;
break;
case MVT::i32:
Opc = X86::MOV32to32_;
VT = MVT::i32;
Opc2 = X86::TRUNC_GR32_GR8;
Opc2 = X86::TRUNC_32_to8;
break;
}

File diff suppressed because it is too large Load Diff

View File

@ -267,6 +267,9 @@ namespace llvm {
// X86TargetLowering - X86 Implementation of the TargetLowering interface
class X86TargetLowering : public TargetLowering {
int VarArgsFrameIndex; // FrameIndex for start of varargs area.
int RegSaveFrameIndex; // X86-64 vararg func register save area.
unsigned VarArgsGPOffset; // X86-64 vararg func int reg offset.
unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset.
int ReturnAddrIndex; // FrameIndex for return slot.
int BytesToPopOnReturn; // Number of arg bytes ret should pop.
int BytesCallerReserves; // Number of arg bytes caller makes.
@ -347,6 +350,9 @@ namespace llvm {
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
/// X86StackPtr - X86 physical register used as stack ptr.
unsigned X86StackPtr;
/// X86ScalarSSE - Select between SSE2 or x87 floating point ops.
bool X86ScalarSSE;
@ -354,6 +360,10 @@ namespace llvm {
SDOperand LowerCCCArguments(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG);
// X86-64 C Calling Convention implementation.
SDOperand LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG);
// Fast Calling Convention implementation.
SDOperand LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG);

View File

@ -22,7 +22,7 @@ using namespace llvm;
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: TargetInstrInfo(X86Insts, sizeof(X86Insts)/sizeof(X86Insts[0])),
TM(tm), RI(*this) {
TM(tm), RI(tm, *this) {
}
@ -30,7 +30,8 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
unsigned& sourceReg,
unsigned& destReg) const {
MachineOpCode oc = MI.getOpcode();
if (oc == X86::MOV8rr || oc == X86::MOV16rr || oc == X86::MOV32rr ||
if (oc == X86::MOV8rr || oc == X86::MOV16rr ||
oc == X86::MOV32rr || oc == X86::MOV64rr ||
oc == X86::MOV16to16_ || oc == X86::MOV32to32_ ||
oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr ||
oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr ||
@ -59,6 +60,7 @@ unsigned X86InstrInfo::isLoadFromStackSlot(MachineInstr *MI,
case X86::MOV16_rm:
case X86::MOV32rm:
case X86::MOV32_rm:
case X86::MOV64rm:
case X86::FpLD64m:
case X86::MOVSSrm:
case X86::MOVSDrm:
@ -86,6 +88,7 @@ unsigned X86InstrInfo::isStoreToStackSlot(MachineInstr *MI,
case X86::MOV16_mr:
case X86::MOV32mr:
case X86::MOV32_mr:
case X86::MOV64mr:
case X86::FpSTP64m:
case X86::MOVSSmr:
case X86::MOVSDmr:
@ -145,16 +148,20 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr *MI) const {
switch (MI->getOpcode()) {
case X86::INC32r:
case X86::INC64_32r:
assert(MI->getNumOperands() == 2 && "Unknown inc instruction!");
return addRegOffset(BuildMI(X86::LEA32r, 5, Dest), Src, 1);
case X86::INC16r:
case X86::INC64_16r:
if (DisableLEA16) return 0;
assert(MI->getNumOperands() == 2 && "Unknown inc instruction!");
return addRegOffset(BuildMI(X86::LEA16r, 5, Dest), Src, 1);
case X86::DEC32r:
case X86::DEC64_32r:
assert(MI->getNumOperands() == 2 && "Unknown dec instruction!");
return addRegOffset(BuildMI(X86::LEA32r, 5, Dest), Src, -1);
case X86::DEC16r:
case X86::DEC64_16r:
if (DisableLEA16) return 0;
assert(MI->getNumOperands() == 2 && "Unknown dec instruction!");
return addRegOffset(BuildMI(X86::LEA16r, 5, Dest), Src, -1);
@ -264,3 +271,10 @@ X86InstrInfo::reverseBranchCondition(MachineBasicBlock::iterator MI) const {
return BuildMI(*MBB, MBB->erase(MI), ROpcode, 1).addMBB(TMBB);
}
const TargetRegisterClass *X86InstrInfo::getPointerRegClass() const {
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
if (Subtarget->is64Bit())
return &X86::GR64RegClass;
else
return &X86::GR32RegClass;
}

View File

@ -18,6 +18,7 @@
#include "X86RegisterInfo.h"
namespace llvm {
class X86RegisterInfo;
class X86TargetMachine;
/// X86II - This namespace holds all of the target specific flags that
@ -90,12 +91,18 @@ namespace X86II {
// instead of 32 bit data.
OpSize = 1 << 6,
// AsSize - Set if this instruction requires an operand size prefix (0x67),
// which most often indicates that the instruction address 16 bit address
// instead of 32 bit address (or 32 bit address in 64 bit mode).
AdSize = 1 << 7,
//===------------------------------------------------------------------===//
// Op0Mask - There are several prefix bytes that are used to form two byte
// opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
// used to obtain the setting of this field. If no bits in this field is
// set, there is no prefix byte for obtaining a multibyte opcode.
//
Op0Shift = 7,
Op0Shift = 8,
Op0Mask = 0xF << Op0Shift,
// TB - TwoByte - Set if this instruction has a two byte opcode, which
@ -118,19 +125,29 @@ namespace X86II {
XD = 11 << Op0Shift, XS = 12 << Op0Shift,
//===------------------------------------------------------------------===//
// This two-bit field describes the size of an immediate operand. Zero is
// REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
// They are used to specify GPRs and SSE registers, 64-bit operand size,
// etc. We only cares about REX.W and REX.R bits and only the former is
// statically determined.
//
REXShift = 12,
REX_W = 1 << REXShift,
//===------------------------------------------------------------------===//
// This three-bit field describes the size of an immediate operand. Zero is
// unused so that we can tell if we forgot to set a value.
ImmShift = 11,
ImmMask = 3 << ImmShift,
ImmShift = 13,
ImmMask = 7 << ImmShift,
Imm8 = 1 << ImmShift,
Imm16 = 2 << ImmShift,
Imm32 = 3 << ImmShift,
Imm64 = 4 << ImmShift,
//===------------------------------------------------------------------===//
// FP Instruction Classification... Zero is non-fp instruction.
// FPTypeMask - Mask for all of the FP types...
FPTypeShift = 13,
FPTypeShift = 16,
FPTypeMask = 7 << FPTypeShift,
// NotFP - The default, set for instructions that do not use FP registers.
@ -162,9 +179,9 @@ namespace X86II {
// SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
SpecialFP = 7 << FPTypeShift,
OpcodeShift = 16,
// Bits 19 -> 23 are unused
OpcodeShift = 24,
OpcodeMask = 0xFF << OpcodeShift
// Bits 25 -> 31 are unused
};
}
@ -216,6 +233,8 @@ public:
virtual MachineBasicBlock::iterator
reverseBranchCondition(MachineBasicBlock::iterator MI) const;
const TargetRegisterClass *getPointerRegClass() const;
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
// specified opcode number.
//

View File

@ -39,7 +39,7 @@ def SDT_X86CallSeqStart : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
def SDT_X86CallSeqEnd : SDTypeProfile<0, 2, [ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
def SDT_X86Call : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
def SDT_X86Call : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
@ -95,7 +95,7 @@ def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
class X86MemOperand<string printMethod> : Operand<iPTR> {
let PrintMethod = printMethod;
let NumMIOperands = 4;
let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
}
def i8mem : X86MemOperand<"printi8mem">;
@ -107,6 +107,12 @@ def f32mem : X86MemOperand<"printf32mem">;
def f64mem : X86MemOperand<"printf64mem">;
def f128mem : X86MemOperand<"printf128mem">;
def lea32mem : Operand<i32> {
let PrintMethod = "printi32mem";
let NumMIOperands = 4;
let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
}
def SSECC : Operand<i8> {
let PrintMethod = "printSSECC";
}
@ -129,9 +135,9 @@ def brtarget : Operand<OtherVT>;
//
// Define X86 specific addressing mode.
def addr : ComplexPattern<iPTR, 4, "SelectAddr", []>;
def leaaddr : ComplexPattern<iPTR, 4, "SelectLEAAddr",
[add, mul, shl, or, frameindex]>;
def addr : ComplexPattern<iPTR, 4, "SelectAddr", []>;
def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr",
[add, mul, shl, or, frameindex]>;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
@ -158,11 +164,13 @@ def MRMInitReg : Format<32>;
//===----------------------------------------------------------------------===//
// X86 Instruction Predicate Definitions.
def HasMMX : Predicate<"Subtarget->hasMMX()">;
def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
def FPStack : Predicate<"!Subtarget->hasSSE2()">;
def HasMMX : Predicate<"Subtarget->hasMMX()">;
def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
def FPStack : Predicate<"!Subtarget->hasSSE2()">;
def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">;
//===----------------------------------------------------------------------===//
// X86 specific pattern fragments.
@ -171,13 +179,14 @@ def FPStack : Predicate<"!Subtarget->hasSSE2()">;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
// machine code emitter.
class ImmType<bits<2> val> {
bits<2> Value = val;
class ImmType<bits<3> val> {
bits<3> Value = val;
}
def NoImm : ImmType<0>;
def Imm8 : ImmType<1>;
def Imm16 : ImmType<2>;
def Imm32 : ImmType<3>;
def Imm64 : ImmType<4>;
// FPFormat - This specifies what form this FP instruction has. This is used by
// the Floating-Point stackifier pass.
@ -202,7 +211,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag ops, string AsmStr>
Format Form = f;
bits<6> FormBits = Form.Value;
ImmType ImmT = i;
bits<2> ImmTypeBits = ImmT.Value;
bits<3> ImmTypeBits = ImmT.Value;
dag OperandList = ops;
string AsmString = AsmStr;
@ -210,9 +219,11 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag ops, string AsmStr>
//
// Attributes specific to X86 instructions...
//
bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix?
bits<4> Prefix = 0; // Which prefix byte does this inst have?
bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix?
FPFormat FPForm; // What flavor of FP instruction is this?
bits<3> FPFormBits = 0;
}
@ -226,6 +237,8 @@ class Imp<list<Register> uses, list<Register> defs> {
// Prefix byte classes which are used to indicate to the ad-hoc machine code
// emitter that various prefix bytes are required.
class OpSize { bit hasOpSizePrefix = 1; }
class AdSize { bit hasAdSizePrefix = 1; }
class REX_W { bit hasREX_WPrefix = 1; }
class TB { bits<4> Prefix = 1; }
class REP { bits<4> Prefix = 2; }
class D8 { bits<4> Prefix = 3; }
@ -276,8 +289,6 @@ def i32immSExt8 : PatLeaf<(i32 imm), [{
}]>;
// Helper fragments for loads.
def loadiPTR : PatFrag<(ops node:$ptr), (iPTR (load node:$ptr))>;
def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
def loadi16 : PatFrag<(ops node:$ptr), (i16 (load node:$ptr))>;
def loadi32 : PatFrag<(ops node:$ptr), (i32 (load node:$ptr))>;
@ -308,6 +319,7 @@ def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extload node:$ptr, i16))>;
//===----------------------------------------------------------------------===//
// Instruction templates...
//
class I<bits<8> o, Format f, dag ops, string asm, list<dag> pattern>
: X86Inst<o, f, NoImm, ops, asm> {
@ -355,13 +367,13 @@ def IMPLICIT_DEF_GR32 : I<0, Pseudo, (ops GR32:$dst),
def NOOP : I<0x90, RawFrm, (ops), "nop", []>;
// Truncate
def TRUNC_GR32_GR8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR32_:$src),
"mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}", []>;
def TRUNC_GR16_GR8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR16_:$src),
"mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}}", []>;
def TRUNC_GR32_GR16 : I<0x89, MRMDestReg, (ops GR16:$dst, GR32:$src),
"mov{w} {${src:subreg16}, $dst|$dst, ${src:subreg16}}",
[(set GR16:$dst, (trunc GR32:$src))]>;
def TRUNC_32_to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR32_:$src),
"mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}", []>;
def TRUNC_16_to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR16_:$src),
"mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}}", []>;
def TRUNC_32to16 : I<0x89, MRMDestReg, (ops GR16:$dst, GR32:$src),
"mov{w} {${src:subreg16}, $dst|$dst, ${src:subreg16}}",
[(set GR16:$dst, (trunc GR32:$src))]>;
//===----------------------------------------------------------------------===//
// Control Flow Instructions...
@ -388,7 +400,7 @@ let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in {
def JMP32r : I<0xFF, MRM4r, (ops GR32:$dst), "jmp{l} {*}$dst",
[(brind GR32:$dst)]>;
def JMP32m : I<0xFF, MRM4m, (ops i32mem:$dst), "jmp{l} {*}$dst",
[(brind (loadiPTR addr:$dst))]>;
[(brind (loadi32 addr:$dst))]>;
}
// Conditional branches
@ -510,9 +522,9 @@ def LEA16r : I<0x8D, MRMSrcMem,
(ops GR16:$dst, i32mem:$src),
"lea{w} {$src|$dst}, {$dst|$src}", []>, OpSize;
def LEA32r : I<0x8D, MRMSrcMem,
(ops GR32:$dst, i32mem:$src),
(ops GR32:$dst, lea32mem:$src),
"lea{l} {$src|$dst}, {$dst|$src}",
[(set GR32:$dst, leaaddr:$src)]>;
[(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
def REP_MOVSB : I<0xA4, RawFrm, (ops), "{rep;movsb|rep movsb}",
[(X86rep_movs i8)]>,
@ -1101,9 +1113,10 @@ def INC8r : I<0xFE, MRM0r, (ops GR8 :$dst, GR8 :$src), "inc{b} $dst",
[(set GR8:$dst, (add GR8:$src, 1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
def INC16r : I<0x40, AddRegFrm, (ops GR16:$dst, GR16:$src), "inc{w} $dst",
[(set GR16:$dst, (add GR16:$src, 1))]>, OpSize;
[(set GR16:$dst, (add GR16:$src, 1))]>,
OpSize, Requires<[In32BitMode]>;
def INC32r : I<0x40, AddRegFrm, (ops GR32:$dst, GR32:$src), "inc{l} $dst",
[(set GR32:$dst, (add GR32:$src, 1))]>;
[(set GR32:$dst, (add GR32:$src, 1))]>, Requires<[In32BitMode]>;
}
let isTwoAddress = 0, CodeSize = 2 in {
def INC8m : I<0xFE, MRM0m, (ops i8mem :$dst), "inc{b} $dst",
@ -1119,9 +1132,10 @@ def DEC8r : I<0xFE, MRM1r, (ops GR8 :$dst, GR8 :$src), "dec{b} $dst",
[(set GR8:$dst, (add GR8:$src, -1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
def DEC16r : I<0x48, AddRegFrm, (ops GR16:$dst, GR16:$src), "dec{w} $dst",
[(set GR16:$dst, (add GR16:$src, -1))]>, OpSize;
[(set GR16:$dst, (add GR16:$src, -1))]>,
OpSize, Requires<[In32BitMode]>;
def DEC32r : I<0x48, AddRegFrm, (ops GR32:$dst, GR32:$src), "dec{l} $dst",
[(set GR32:$dst, (add GR32:$src, -1))]>;
[(set GR32:$dst, (add GR32:$src, -1))]>, Requires<[In32BitMode]>;
}
let isTwoAddress = 0, CodeSize = 2 in {
@ -2455,7 +2469,7 @@ def DWARF_LABEL : I<0, Pseudo, (ops i32imm:$id),
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
// ConstantPool GlobalAddress, ExternalSymbol
// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
@ -2477,18 +2491,16 @@ def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
// Calls
def : Pat<(X86tailcall GR32:$dst),
(CALL32r GR32:$dst)>;
(CALL32r GR32:$dst)>;
def : Pat<(X86tailcall tglobaladdr:$dst),
def : Pat<(X86tailcall (i32 tglobaladdr:$dst)),
(CALLpcrel32 tglobaladdr:$dst)>;
def : Pat<(X86tailcall texternalsym:$dst),
def : Pat<(X86tailcall (i32 texternalsym:$dst)),
(CALLpcrel32 texternalsym:$dst)>;
def : Pat<(X86call tglobaladdr:$dst),
def : Pat<(X86call (i32 tglobaladdr:$dst)),
(CALLpcrel32 tglobaladdr:$dst)>;
def : Pat<(X86call texternalsym:$dst),
def : Pat<(X86call (i32 texternalsym:$dst)),
(CALLpcrel32 texternalsym:$dst)>;
// X86 specific add which produces a flag.
@ -2611,3 +2623,9 @@ include "X86InstrMMX.td"
//===----------------------------------------------------------------------===//
include "X86InstrSSE.td"
//===----------------------------------------------------------------------===//
// X86-64 Support
//===----------------------------------------------------------------------===//
include "X86InstrX86-64.td"

File diff suppressed because it is too large Load Diff

View File

@ -86,8 +86,9 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
if (MRegisterInfo::isPhysicalRegister(MO.getReg())) {
unsigned Reg = MO.getReg();
if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
MVT::ValueType VT = (strcmp(Modifier,"subreg16") == 0)
? MVT::i16 : MVT::i8;
MVT::ValueType VT = (strcmp(Modifier,"subreg64") == 0) ?
MVT::i64 : ((strcmp(Modifier, "subreg32") == 0) ? MVT::i32 :
((strcmp(Modifier,"subreg16") == 0) ? MVT::i16 :MVT::i8));
Reg = getX86SubSuperRegister(Reg, VT);
}
O << RI.get(Reg).Name;
@ -137,7 +138,8 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
}
}
void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
const char *Modifier) {
assert(isMem(MI, Op) && "Invalid memory reference!");
const MachineOperand &BaseReg = MI->getOperand(Op);
@ -156,7 +158,7 @@ void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
O << "[";
bool NeedPlus = false;
if (BaseReg.getReg()) {
printOp(BaseReg, "mem");
printOp(BaseReg, Modifier);
NeedPlus = true;
}
@ -164,7 +166,7 @@ void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
if (NeedPlus) O << " + ";
if (ScaleVal != 1)
O << ScaleVal << "*";
printOp(IndexReg);
printOp(IndexReg, Modifier);
NeedPlus = true;
}
@ -259,14 +261,21 @@ void X86IntelAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
// See if a truncate instruction can be turned into a nop.
switch (MI->getOpcode()) {
default: break;
case X86::TRUNC_GR32_GR16:
case X86::TRUNC_GR32_GR8:
case X86::TRUNC_GR16_GR8: {
case X86::TRUNC_64to32:
case X86::TRUNC_64to16:
case X86::TRUNC_32to16:
case X86::TRUNC_32to8:
case X86::TRUNC_16to8:
case X86::TRUNC_32_to8:
case X86::TRUNC_16_to8: {
const MachineOperand &MO0 = MI->getOperand(0);
const MachineOperand &MO1 = MI->getOperand(1);
unsigned Reg0 = MO0.getReg();
unsigned Reg1 = MO1.getReg();
if (MI->getOpcode() == X86::TRUNC_GR32_GR16)
unsigned Opc = MI->getOpcode();
if (Opc == X86::TRUNC_64to32)
Reg1 = getX86SubSuperRegister(Reg1, MVT::i32);
else if (Opc == X86::TRUNC_32to16 || Opc == X86::TRUNC_64to16)
Reg1 = getX86SubSuperRegister(Reg1, MVT::i16);
else
Reg1 = getX86SubSuperRegister(Reg1, MVT::i8);
@ -275,6 +284,9 @@ void X86IntelAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
O << "\n\t";
break;
}
case X86::PsMOVZX64rr32:
O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t";
break;
}
// Call the autogenerated instruction printer routines.

View File

@ -80,6 +80,10 @@ struct X86IntelAsmPrinter : public X86SharedAsmPrinter {
O << "XMMWORD PTR ";
printMemReference(MI, OpNo);
}
void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) {
O << "QWORD PTR ";
printMemReference(MI, OpNo, "subreg64");
}
bool printAsmMRegister(const MachineOperand &MO, const char Mode);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@ -89,7 +93,8 @@ struct X86IntelAsmPrinter : public X86SharedAsmPrinter {
void printMachineInstruction(const MachineInstr *MI);
void printOp(const MachineOperand &MO, const char *Modifier = 0);
void printSSECC(const MachineInstr *MI, unsigned Op);
void printMemReference(const MachineInstr *MI, unsigned Op);
void printMemReference(const MachineInstr *MI, unsigned Op,
const char *Modifier=NULL);
void printPICLabel(const MachineInstr *MI, unsigned Op);
bool runOnMachineFunction(MachineFunction &F);
bool doInitialization(Module &M);

View File

@ -42,7 +42,65 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction;
// Provide a wrapper for X86CompilationCallback2 that saves non-traditional
// callee saved registers, for the fastcc calling convention.
extern "C" {
#if defined(__i386__) || defined(i386) || defined(_M_IX86)
#if defined(__x86_64__)
// No need to save EAX/EDX for X86-64.
void X86CompilationCallback(void);
asm(
".text\n"
".align 8\n"
".globl _X86CompilationCallback\n"
"_X86CompilationCallback:\n"
// Save RBP
"pushq %rbp\n"
// Save RSP
"movq %rsp, %rbp\n"
// Save all int arg registers
"pushq %rdi\n"
"pushq %rsi\n"
"pushq %rdx\n"
"pushq %rcx\n"
"pushq %r8\n"
"pushq %r9\n"
// Align stack on 16-byte boundary. ESP might not be properly aligned
// (8 byte) if this is called from an indirect stub.
"andq $-16, %rsp\n"
// Save all XMM arg registers
"subq $128, %rsp\n"
"movaps %xmm0, (%rsp)\n"
"movaps %xmm1, 16(%rsp)\n"
"movaps %xmm2, 32(%rsp)\n"
"movaps %xmm3, 48(%rsp)\n"
"movaps %xmm4, 64(%rsp)\n"
"movaps %xmm5, 80(%rsp)\n"
"movaps %xmm6, 96(%rsp)\n"
"movaps %xmm7, 112(%rsp)\n"
// JIT callee
"movq %rbp, %rdi\n" // Pass prev frame and return address
"movq 8(%rbp), %rsi\n"
"call _X86CompilationCallback2\n"
// Restore all XMM arg registers
"movaps 112(%rsp), %xmm7\n"
"movaps 96(%rsp), %xmm6\n"
"movaps 80(%rsp), %xmm5\n"
"movaps 64(%rsp), %xmm4\n"
"movaps 48(%rsp), %xmm3\n"
"movaps 32(%rsp), %xmm2\n"
"movaps 16(%rsp), %xmm1\n"
"movaps (%rsp), %xmm0\n"
// Restore RSP
"movq %rbp, %rsp\n"
// Restore all int arg registers
"subq $48, %rsp\n"
"popq %r9\n"
"popq %r8\n"
"popq %rcx\n"
"popq %rdx\n"
"popq %rsi\n"
"popq %rdi\n"
// Restore RBP
"popq %rbp\n"
"ret\n");
#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
#ifndef _MSC_VER
void X86CompilationCallback(void);
asm(
@ -122,7 +180,7 @@ extern "C" void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
"Could not find return address on the stack!");
// It's a stub if there is an interrupt marker after the call.
bool isStub = ((unsigned char*)(intptr_t)RetAddr)[0] == 0xCD;
bool isStub = ((unsigned char*)RetAddr)[0] == 0xCD;
// The call instruction should have pushed the return value onto the stack...
RetAddr -= 4; // Backtrack to the reference itself...
@ -135,20 +193,20 @@ extern "C" void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
#endif
// Sanity check to make sure this really is a call instruction.
assert(((unsigned char*)(intptr_t)RetAddr)[-1] == 0xE8 &&"Not a call instr!");
assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!");
unsigned NewVal = (intptr_t)JITCompilerFunction((void*)(intptr_t)RetAddr);
intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
// Rewrite the call target... so that we don't end up here every time we
// execute the call.
*(unsigned*)(intptr_t)RetAddr = NewVal-RetAddr-4;
*(unsigned *)RetAddr = (unsigned)(NewVal-RetAddr-4);
if (isStub) {
// If this is a stub, rewrite the call into an unconditional branch
// instruction so that two return addresses are not pushed onto the stack
// when the requested function finally gets called. This also makes the
// 0xCD byte (interrupt) dead, so the marker doesn't effect anything.
((unsigned char*)(intptr_t)RetAddr)[-1] = 0xE9;
((unsigned char*)RetAddr)[-1] = 0xE9;
}
// Change the return address to reexecute the call instruction...
@ -189,16 +247,17 @@ void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
switch ((X86::RelocationType)MR->getRelocationType()) {
case X86::reloc_pcrel_word:
case X86::reloc_pcrel_word: {
// PC relative relocation, add the relocated value to the value already in
// memory, after we adjust it for where the PC is.
ResultPtr = ResultPtr-(intptr_t)RelocPos-4;
*((intptr_t*)RelocPos) += ResultPtr;
ResultPtr = ResultPtr-(intptr_t)RelocPos-4-MR->getConstantVal();
*((unsigned*)RelocPos) += (unsigned)ResultPtr;
break;
}
case X86::reloc_absolute_word:
// Absolute relocation, just add the relocated value to the value already
// in memory.
*((intptr_t*)RelocPos) += ResultPtr;
*((unsigned*)RelocPos) += (unsigned)ResultPtr;
break;
}
}

View File

@ -14,13 +14,13 @@
#include "X86.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/Constants.h"
#include "llvm/Type.h"
#include "llvm/Function.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
@ -46,15 +46,32 @@ namespace {
cl::Hidden);
}
X86RegisterInfo::X86RegisterInfo(const TargetInstrInfo &tii)
: X86GenRegisterInfo(X86::ADJCALLSTACKDOWN, X86::ADJCALLSTACKUP), TII(tii) {}
X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
const TargetInstrInfo &tii)
: X86GenRegisterInfo(X86::ADJCALLSTACKDOWN, X86::ADJCALLSTACKUP),
TM(tm), TII(tii) {
// Cache some information.
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
Is64Bit = Subtarget->is64Bit();
if (Is64Bit) {
SlotSize = 8;
StackPtr = X86::RSP;
FramePtr = X86::RBP;
} else {
SlotSize = 4;
StackPtr = X86::ESP;
FramePtr = X86::EBP;
}
}
void X86RegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, int FrameIdx,
const TargetRegisterClass *RC) const {
unsigned Opc;
if (RC == &X86::GR32RegClass) {
if (RC == &X86::GR64RegClass) {
Opc = X86::MOV64mr;
} else if (RC == &X86::GR32RegClass) {
Opc = X86::MOV32mr;
} else if (RC == &X86::GR16RegClass) {
Opc = X86::MOV16mr;
@ -84,7 +101,9 @@ void X86RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC) const{
unsigned Opc;
if (RC == &X86::GR32RegClass) {
if (RC == &X86::GR64RegClass) {
Opc = X86::MOV64rm;
} else if (RC == &X86::GR32RegClass) {
Opc = X86::MOV32rm;
} else if (RC == &X86::GR16RegClass) {
Opc = X86::MOV16rm;
@ -114,7 +133,9 @@ void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *RC) const {
unsigned Opc;
if (RC == &X86::GR32RegClass) {
if (RC == &X86::GR64RegClass) {
Opc = X86::MOV64rr;
} else if (RC == &X86::GR32RegClass) {
Opc = X86::MOV32rr;
} else if (RC == &X86::GR16RegClass) {
Opc = X86::MOV16rr;
@ -270,12 +291,18 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::ADC32ri, X86::ADC32mi },
{ X86::ADC32ri8, X86::ADC32mi8 },
{ X86::ADC32rr, X86::ADC32mr },
{ X86::ADC64ri32, X86::ADC64mi32 },
{ X86::ADC64ri8, X86::ADC64mi8 },
{ X86::ADC64rr, X86::ADC64mr },
{ X86::ADD16ri, X86::ADD16mi },
{ X86::ADD16ri8, X86::ADD16mi8 },
{ X86::ADD16rr, X86::ADD16mr },
{ X86::ADD32ri, X86::ADD32mi },
{ X86::ADD32ri8, X86::ADD32mi8 },
{ X86::ADD32rr, X86::ADD32mr },
{ X86::ADD64ri32, X86::ADD64mi32 },
{ X86::ADD64ri8, X86::ADD64mi8 },
{ X86::ADD64rr, X86::ADD64mr },
{ X86::ADD8ri, X86::ADD8mi },
{ X86::ADD8rr, X86::ADD8mr },
{ X86::AND16ri, X86::AND16mi },
@ -284,19 +311,30 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::AND32ri, X86::AND32mi },
{ X86::AND32ri8, X86::AND32mi8 },
{ X86::AND32rr, X86::AND32mr },
{ X86::AND64ri32, X86::AND64mi32 },
{ X86::AND64ri8, X86::AND64mi8 },
{ X86::AND64rr, X86::AND64mr },
{ X86::AND8ri, X86::AND8mi },
{ X86::AND8rr, X86::AND8mr },
{ X86::DEC16r, X86::DEC16m },
{ X86::DEC32r, X86::DEC32m },
{ X86::DEC64_16r, X86::DEC16m },
{ X86::DEC64_32r, X86::DEC32m },
{ X86::DEC64r, X86::DEC64m },
{ X86::DEC8r, X86::DEC8m },
{ X86::INC16r, X86::INC16m },
{ X86::INC32r, X86::INC32m },
{ X86::INC64_16r, X86::INC16m },
{ X86::INC64_32r, X86::INC32m },
{ X86::INC64r, X86::INC64m },
{ X86::INC8r, X86::INC8m },
{ X86::NEG16r, X86::NEG16m },
{ X86::NEG32r, X86::NEG32m },
{ X86::NEG64r, X86::NEG64m },
{ X86::NEG8r, X86::NEG8m },
{ X86::NOT16r, X86::NOT16m },
{ X86::NOT32r, X86::NOT32m },
{ X86::NOT64r, X86::NOT64m },
{ X86::NOT8r, X86::NOT8m },
{ X86::OR16ri, X86::OR16mi },
{ X86::OR16ri8, X86::OR16mi8 },
@ -304,6 +342,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::OR32ri, X86::OR32mi },
{ X86::OR32ri8, X86::OR32mi8 },
{ X86::OR32rr, X86::OR32mr },
{ X86::OR64ri32, X86::OR64mi32 },
{ X86::OR64ri8, X86::OR64mi8 },
{ X86::OR64rr, X86::OR64mr },
{ X86::OR8ri, X86::OR8mi },
{ X86::OR8rr, X86::OR8mr },
{ X86::ROL16r1, X86::ROL16m1 },
@ -312,6 +353,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::ROL32r1, X86::ROL32m1 },
{ X86::ROL32rCL, X86::ROL32mCL },
{ X86::ROL32ri, X86::ROL32mi },
{ X86::ROL64r1, X86::ROL64m1 },
{ X86::ROL64rCL, X86::ROL64mCL },
{ X86::ROL64ri, X86::ROL64mi },
{ X86::ROL8r1, X86::ROL8m1 },
{ X86::ROL8rCL, X86::ROL8mCL },
{ X86::ROL8ri, X86::ROL8mi },
@ -321,6 +365,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::ROR32r1, X86::ROR32m1 },
{ X86::ROR32rCL, X86::ROR32mCL },
{ X86::ROR32ri, X86::ROR32mi },
{ X86::ROR64r1, X86::ROR64m1 },
{ X86::ROR64rCL, X86::ROR64mCL },
{ X86::ROR64ri, X86::ROR64mi },
{ X86::ROR8r1, X86::ROR8m1 },
{ X86::ROR8rCL, X86::ROR8mCL },
{ X86::ROR8ri, X86::ROR8mi },
@ -330,18 +377,27 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::SAR32r1, X86::SAR32m1 },
{ X86::SAR32rCL, X86::SAR32mCL },
{ X86::SAR32ri, X86::SAR32mi },
{ X86::SAR64r1, X86::SAR64m1 },
{ X86::SAR64rCL, X86::SAR64mCL },
{ X86::SAR64ri, X86::SAR64mi },
{ X86::SAR8r1, X86::SAR8m1 },
{ X86::SAR8rCL, X86::SAR8mCL },
{ X86::SAR8ri, X86::SAR8mi },
{ X86::SBB32ri, X86::SBB32mi },
{ X86::SBB32ri8, X86::SBB32mi8 },
{ X86::SBB32rr, X86::SBB32mr },
{ X86::SBB64ri32, X86::SBB64mi32 },
{ X86::SBB64ri8, X86::SBB64mi8 },
{ X86::SBB64rr, X86::SBB64mr },
{ X86::SHL16r1, X86::SHL16m1 },
{ X86::SHL16rCL, X86::SHL16mCL },
{ X86::SHL16ri, X86::SHL16mi },
{ X86::SHL32r1, X86::SHL32m1 },
{ X86::SHL32rCL, X86::SHL32mCL },
{ X86::SHL32ri, X86::SHL32mi },
{ X86::SHL64r1, X86::SHL64m1 },
{ X86::SHL64rCL, X86::SHL64mCL },
{ X86::SHL64ri, X86::SHL64mi },
{ X86::SHL8r1, X86::SHL8m1 },
{ X86::SHL8rCL, X86::SHL8mCL },
{ X86::SHL8ri, X86::SHL8mi },
@ -349,12 +405,17 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::SHLD16rri8, X86::SHLD16mri8 },
{ X86::SHLD32rrCL, X86::SHLD32mrCL },
{ X86::SHLD32rri8, X86::SHLD32mri8 },
{ X86::SHLD64rrCL, X86::SHLD64mrCL },
{ X86::SHLD64rri8, X86::SHLD64mri8 },
{ X86::SHR16r1, X86::SHR16m1 },
{ X86::SHR16rCL, X86::SHR16mCL },
{ X86::SHR16ri, X86::SHR16mi },
{ X86::SHR32r1, X86::SHR32m1 },
{ X86::SHR32rCL, X86::SHR32mCL },
{ X86::SHR32ri, X86::SHR32mi },
{ X86::SHR64r1, X86::SHR64m1 },
{ X86::SHR64rCL, X86::SHR64mCL },
{ X86::SHR64ri, X86::SHR64mi },
{ X86::SHR8r1, X86::SHR8m1 },
{ X86::SHR8rCL, X86::SHR8mCL },
{ X86::SHR8ri, X86::SHR8mi },
@ -362,12 +423,17 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::SHRD16rri8, X86::SHRD16mri8 },
{ X86::SHRD32rrCL, X86::SHRD32mrCL },
{ X86::SHRD32rri8, X86::SHRD32mri8 },
{ X86::SHRD64rrCL, X86::SHRD64mrCL },
{ X86::SHRD64rri8, X86::SHRD64mri8 },
{ X86::SUB16ri, X86::SUB16mi },
{ X86::SUB16ri8, X86::SUB16mi8 },
{ X86::SUB16rr, X86::SUB16mr },
{ X86::SUB32ri, X86::SUB32mi },
{ X86::SUB32ri8, X86::SUB32mi8 },
{ X86::SUB32rr, X86::SUB32mr },
{ X86::SUB64ri32, X86::SUB64mi32 },
{ X86::SUB64ri8, X86::SUB64mi8 },
{ X86::SUB64rr, X86::SUB64mr },
{ X86::SUB8ri, X86::SUB8mi },
{ X86::SUB8rr, X86::SUB8mr },
{ X86::XOR16ri, X86::XOR16mi },
@ -376,6 +442,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::XOR32ri, X86::XOR32mi },
{ X86::XOR32ri8, X86::XOR32mi8 },
{ X86::XOR32rr, X86::XOR32mr },
{ X86::XOR64ri32, X86::XOR64mi32 },
{ X86::XOR64ri8, X86::XOR64mi8 },
{ X86::XOR64rr, X86::XOR64mr },
{ X86::XOR8ri, X86::XOR8mi },
{ X86::XOR8rr, X86::XOR8mr }
};
@ -388,6 +457,8 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
return MakeM0Inst(X86::MOV16mi, FrameIndex, MI);
else if (MI->getOpcode() == X86::MOV32r0)
return MakeM0Inst(X86::MOV32mi, FrameIndex, MI);
else if (MI->getOpcode() == X86::MOV64r0)
return MakeM0Inst(X86::MOV64mi32, FrameIndex, MI);
else if (MI->getOpcode() == X86::MOV8r0)
return MakeM0Inst(X86::MOV8mi, FrameIndex, MI);
@ -399,19 +470,24 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::CMP8ri, X86::CMP8mi },
{ X86::DIV16r, X86::DIV16m },
{ X86::DIV32r, X86::DIV32m },
{ X86::DIV64r, X86::DIV64m },
{ X86::DIV8r, X86::DIV8m },
{ X86::FsMOVAPDrr, X86::MOVSDmr },
{ X86::FsMOVAPSrr, X86::MOVSSmr },
{ X86::IDIV16r, X86::IDIV16m },
{ X86::IDIV32r, X86::IDIV32m },
{ X86::IDIV64r, X86::IDIV64m },
{ X86::IDIV8r, X86::IDIV8m },
{ X86::IMUL16r, X86::IMUL16m },
{ X86::IMUL32r, X86::IMUL32m },
{ X86::IMUL64r, X86::IMUL64m },
{ X86::IMUL8r, X86::IMUL8m },
{ X86::MOV16ri, X86::MOV16mi },
{ X86::MOV16rr, X86::MOV16mr },
{ X86::MOV32ri, X86::MOV32mi },
{ X86::MOV32rr, X86::MOV32mr },
{ X86::MOV64ri32, X86::MOV64mi32 },
{ X86::MOV64rr, X86::MOV64mr },
{ X86::MOV8ri, X86::MOV8mi },
{ X86::MOV8rr, X86::MOV8mr },
{ X86::MOVAPDrr, X86::MOVAPDmr },
@ -424,6 +500,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::MOVUPSrr, X86::MOVUPSmr },
{ X86::MUL16r, X86::MUL16m },
{ X86::MUL32r, X86::MUL32m },
{ X86::MUL64r, X86::MUL64m },
{ X86::MUL8r, X86::MUL8m },
{ X86::SETAEr, X86::SETAEm },
{ X86::SETAr, X86::SETAm },
@ -441,9 +518,11 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::SETSr, X86::SETSm },
{ X86::TEST16ri, X86::TEST16mi },
{ X86::TEST32ri, X86::TEST32mi },
{ X86::TEST64ri32, X86::TEST64mi32 },
{ X86::TEST8ri, X86::TEST8mi },
{ X86::XCHG16rr, X86::XCHG16mr },
{ X86::XCHG32rr, X86::XCHG32mr },
{ X86::XCHG64rr, X86::XCHG64mr },
{ X86::XCHG8rr, X86::XCHG8mr }
};
ASSERT_SORTED(OpcodeTable);
@ -453,16 +532,23 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
static const TableEntry OpcodeTable[] = {
{ X86::CMP16rr, X86::CMP16rm },
{ X86::CMP32rr, X86::CMP32rm },
{ X86::CMP64ri32, X86::CMP64mi32 },
{ X86::CMP64ri8, X86::CMP64mi8 },
{ X86::CMP64rr, X86::CMP64rm },
{ X86::CMP8rr, X86::CMP8rm },
{ X86::CMPPDrri, X86::CMPPDrmi },
{ X86::CMPPSrri, X86::CMPPSrmi },
{ X86::CMPSDrr, X86::CMPSDrm },
{ X86::CMPSSrr, X86::CMPSSrm },
{ X86::CVTSD2SSrr, X86::CVTSD2SSrm },
{ X86::CVTSI2SD64rr, X86::CVTSI2SD64rm },
{ X86::CVTSI2SDrr, X86::CVTSI2SDrm },
{ X86::CVTSI2SS64rr, X86::CVTSI2SS64rm },
{ X86::CVTSI2SSrr, X86::CVTSI2SSrm },
{ X86::CVTSS2SDrr, X86::CVTSS2SDrm },
{ X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm },
{ X86::CVTTSD2SIrr, X86::CVTTSD2SIrm },
{ X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm },
{ X86::CVTTSS2SIrr, X86::CVTTSS2SIrm },
{ X86::FsMOVAPDrr, X86::MOVSDrm },
{ X86::FsMOVAPSrr, X86::MOVSSrm },
@ -470,6 +556,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::IMUL16rri8, X86::IMUL16rmi8 },
{ X86::IMUL32rri, X86::IMUL32rmi },
{ X86::IMUL32rri8, X86::IMUL32rmi8 },
{ X86::IMUL64rr, X86::IMUL64rm },
{ X86::IMUL64rri32, X86::IMUL64rmi32 },
{ X86::IMUL64rri8, X86::IMUL64rmi8 },
{ X86::Int_CMPSDrr, X86::Int_CMPSDrm },
{ X86::Int_CMPSSrr, X86::Int_CMPSSrm },
{ X86::Int_COMISDrr, X86::Int_COMISDrm },
@ -480,20 +569,27 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm },
{ X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm },
{ X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm },
{ X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm },
{ X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm },
{ X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm },
{ X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm },
{ X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm },
{ X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm },
{ X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm },
{ X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm },
{ X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm },
{ X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm },
{ X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm },
{ X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm },
{ X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm },
{ X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm },
{ X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm },
{ X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm },
{ X86::Int_UCOMISDrr, X86::Int_UCOMISDrm },
{ X86::Int_UCOMISSrr, X86::Int_UCOMISSrm },
{ X86::MOV16rr, X86::MOV16rm },
{ X86::MOV32rr, X86::MOV32rm },
{ X86::MOV64rr, X86::MOV64rm },
{ X86::MOV8rr, X86::MOV8rm },
{ X86::MOVAPDrr, X86::MOVAPDrm },
{ X86::MOVAPSrr, X86::MOVAPSrm },
@ -509,22 +605,30 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::MOVSX16rr8, X86::MOVSX16rm8 },
{ X86::MOVSX32rr16, X86::MOVSX32rm16 },
{ X86::MOVSX32rr8, X86::MOVSX32rm8 },
{ X86::MOVSX64rr16, X86::MOVSX64rm16 },
{ X86::MOVSX64rr32, X86::MOVSX64rm32 },
{ X86::MOVSX64rr8, X86::MOVSX64rm8 },
{ X86::MOVUPDrr, X86::MOVUPDrm },
{ X86::MOVUPSrr, X86::MOVUPSrm },
{ X86::MOVZX16rr8, X86::MOVZX16rm8 },
{ X86::MOVZX32rr16, X86::MOVZX32rm16 },
{ X86::MOVZX32rr8, X86::MOVZX32rm8 },
{ X86::MOVZX64rr16, X86::MOVZX64rm16 },
{ X86::MOVZX64rr8, X86::MOVZX64rm8 },
{ X86::PSHUFDri, X86::PSHUFDmi },
{ X86::PSHUFHWri, X86::PSHUFHWmi },
{ X86::PSHUFLWri, X86::PSHUFLWmi },
{ X86::PsMOVZX64rr32, X86::PsMOVZX64rm32 },
{ X86::TEST16rr, X86::TEST16rm },
{ X86::TEST32rr, X86::TEST32rm },
{ X86::TEST64rr, X86::TEST64rm },
{ X86::TEST8rr, X86::TEST8rm },
// FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
{ X86::UCOMISDrr, X86::UCOMISDrm },
{ X86::UCOMISSrr, X86::UCOMISSrm },
{ X86::XCHG16rr, X86::XCHG16rm },
{ X86::XCHG32rr, X86::XCHG32rm },
{ X86::XCHG64rr, X86::XCHG64rm },
{ X86::XCHG8rr, X86::XCHG8rm }
};
ASSERT_SORTED(OpcodeTable);
@ -533,8 +637,10 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
} else if (i == 2) {
static const TableEntry OpcodeTable[] = {
{ X86::ADC32rr, X86::ADC32rm },
{ X86::ADC64rr, X86::ADC64rm },
{ X86::ADD16rr, X86::ADD16rm },
{ X86::ADD32rr, X86::ADD32rm },
{ X86::ADD64rr, X86::ADD64rm },
{ X86::ADD8rr, X86::ADD8rm },
{ X86::ADDPDrr, X86::ADDPDrm },
{ X86::ADDPSrr, X86::ADDPSrm },
@ -544,6 +650,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::ADDSUBPSrr, X86::ADDSUBPSrm },
{ X86::AND16rr, X86::AND16rm },
{ X86::AND32rr, X86::AND32rm },
{ X86::AND64rr, X86::AND64rm },
{ X86::AND8rr, X86::AND8rm },
{ X86::ANDNPDrr, X86::ANDNPDrm },
{ X86::ANDNPSrr, X86::ANDNPSrm },
@ -551,32 +658,46 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::ANDPSrr, X86::ANDPSrm },
{ X86::CMOVA16rr, X86::CMOVA16rm },
{ X86::CMOVA32rr, X86::CMOVA32rm },
{ X86::CMOVA64rr, X86::CMOVA64rm },
{ X86::CMOVAE16rr, X86::CMOVAE16rm },
{ X86::CMOVAE32rr, X86::CMOVAE32rm },
{ X86::CMOVAE64rr, X86::CMOVAE64rm },
{ X86::CMOVB16rr, X86::CMOVB16rm },
{ X86::CMOVB32rr, X86::CMOVB32rm },
{ X86::CMOVB64rr, X86::CMOVB64rm },
{ X86::CMOVBE16rr, X86::CMOVBE16rm },
{ X86::CMOVBE32rr, X86::CMOVBE32rm },
{ X86::CMOVBE64rr, X86::CMOVBE64rm },
{ X86::CMOVE16rr, X86::CMOVE16rm },
{ X86::CMOVE32rr, X86::CMOVE32rm },
{ X86::CMOVE64rr, X86::CMOVE64rm },
{ X86::CMOVG16rr, X86::CMOVG16rm },
{ X86::CMOVG32rr, X86::CMOVG32rm },
{ X86::CMOVG64rr, X86::CMOVG64rm },
{ X86::CMOVGE16rr, X86::CMOVGE16rm },
{ X86::CMOVGE32rr, X86::CMOVGE32rm },
{ X86::CMOVGE64rr, X86::CMOVGE64rm },
{ X86::CMOVL16rr, X86::CMOVL16rm },
{ X86::CMOVL32rr, X86::CMOVL32rm },
{ X86::CMOVL64rr, X86::CMOVL64rm },
{ X86::CMOVLE16rr, X86::CMOVLE16rm },
{ X86::CMOVLE32rr, X86::CMOVLE32rm },
{ X86::CMOVLE64rr, X86::CMOVLE64rm },
{ X86::CMOVNE16rr, X86::CMOVNE16rm },
{ X86::CMOVNE32rr, X86::CMOVNE32rm },
{ X86::CMOVNE64rr, X86::CMOVNE64rm },
{ X86::CMOVNP16rr, X86::CMOVNP16rm },
{ X86::CMOVNP32rr, X86::CMOVNP32rm },
{ X86::CMOVNP64rr, X86::CMOVNP64rm },
{ X86::CMOVNS16rr, X86::CMOVNS16rm },
{ X86::CMOVNS32rr, X86::CMOVNS32rm },
{ X86::CMOVNS64rr, X86::CMOVNS64rm },
{ X86::CMOVP16rr, X86::CMOVP16rm },
{ X86::CMOVP32rr, X86::CMOVP32rm },
{ X86::CMOVP64rr, X86::CMOVP64rm },
{ X86::CMOVS16rr, X86::CMOVS16rm },
{ X86::CMOVS32rr, X86::CMOVS32rm },
{ X86::CMOVS64rr, X86::CMOVS64rm },
{ X86::DIVPDrr, X86::DIVPDrm },
{ X86::DIVPSrr, X86::DIVPSrm },
{ X86::DIVSDrr, X86::DIVSDrm },
@ -597,6 +718,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::MULSSrr, X86::MULSSrm },
{ X86::OR16rr, X86::OR16rm },
{ X86::OR32rr, X86::OR32rm },
{ X86::OR64rr, X86::OR64rm },
{ X86::OR8rr, X86::OR8rm },
{ X86::ORPDrr, X86::ORPDrm },
{ X86::ORPSrr, X86::ORPSrm },
@ -655,6 +777,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::RCPPSr, X86::RCPPSm },
{ X86::RSQRTPSr, X86::RSQRTPSm },
{ X86::SBB32rr, X86::SBB32rm },
{ X86::SBB64rr, X86::SBB64rm },
{ X86::SHUFPDrri, X86::SHUFPDrmi },
{ X86::SHUFPSrri, X86::SHUFPSrmi },
{ X86::SQRTPDr, X86::SQRTPDm },
@ -663,6 +786,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::SQRTSSr, X86::SQRTSSm },
{ X86::SUB16rr, X86::SUB16rm },
{ X86::SUB32rr, X86::SUB32rm },
{ X86::SUB64rr, X86::SUB64rm },
{ X86::SUB8rr, X86::SUB8rm },
{ X86::SUBPDrr, X86::SUBPDrm },
{ X86::SUBPSrr, X86::SUBPSrm },
@ -675,6 +799,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
{ X86::UNPCKLPSrr, X86::UNPCKLPSrm },
{ X86::XOR16rr, X86::XOR16rm },
{ X86::XOR32rr, X86::XOR32rm },
{ X86::XOR64rr, X86::XOR64rm },
{ X86::XOR8rr, X86::XOR8rm },
{ X86::XORPDrr, X86::XORPDrm },
{ X86::XORPSrr, X86::XORPSrm }
@ -707,19 +832,29 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
const unsigned *X86RegisterInfo::getCalleeSaveRegs() const {
static const unsigned CalleeSaveRegs[] = {
static const unsigned CalleeSaveRegs32Bit[] = {
X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
};
return CalleeSaveRegs;
static const unsigned CalleeSaveRegs64Bit[] = {
X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
};
return Is64Bit ? CalleeSaveRegs64Bit : CalleeSaveRegs32Bit;
}
const TargetRegisterClass* const*
X86RegisterInfo::getCalleeSaveRegClasses() const {
static const TargetRegisterClass * const CalleeSaveRegClasses[] = {
static const TargetRegisterClass * const CalleeSaveRegClasses32Bit[] = {
&X86::GR32RegClass, &X86::GR32RegClass,
&X86::GR32RegClass, &X86::GR32RegClass, 0
};
return CalleeSaveRegClasses;
static const TargetRegisterClass * const CalleeSaveRegClasses64Bit[] = {
&X86::GR64RegClass, &X86::GR64RegClass,
&X86::GR64RegClass, &X86::GR64RegClass,
&X86::GR64RegClass, &X86::GR64RegClass, 0
};
return Is64Bit ? CalleeSaveRegClasses64Bit : CalleeSaveRegClasses32Bit;
}
//===----------------------------------------------------------------------===//
@ -754,15 +889,18 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineInstr *New = 0;
if (Old->getOpcode() == X86::ADJCALLSTACKDOWN) {
New=BuildMI(X86::SUB32ri, 2, X86::ESP).addReg(X86::ESP).addImm(Amount);
New=BuildMI(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri, 1, StackPtr)
.addReg(StackPtr).addImm(Amount);
} else {
assert(Old->getOpcode() == X86::ADJCALLSTACKUP);
// factor out the amount the callee already popped.
unsigned CalleeAmt = Old->getOperand(1).getImmedValue();
Amount -= CalleeAmt;
if (Amount) {
unsigned Opc = Amount < 128 ? X86::ADD32ri8 : X86::ADD32ri;
New = BuildMI(Opc, 2, X86::ESP).addReg(X86::ESP).addImm(Amount);
unsigned Opc = (Amount < 128) ?
(Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
(Is64Bit ? X86::ADD64ri32 : X86::ADD32ri);
New = BuildMI(Opc, 1, StackPtr).addReg(StackPtr).addImm(Amount);
}
}
@ -774,9 +912,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// something off the stack pointer, add it back. We do this until we have
// more advanced stack pointer tracking ability.
if (unsigned CalleeAmt = I->getOperand(1).getImmedValue()) {
unsigned Opc = CalleeAmt < 128 ? X86::SUB32ri8 : X86::SUB32ri;
unsigned Opc = (CalleeAmt < 128) ?
(Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri);
MachineInstr *New =
BuildMI(Opc, 1, X86::ESP).addReg(X86::ESP).addImm(CalleeAmt);
BuildMI(Opc, 1, StackPtr).addReg(StackPtr).addImm(CalleeAmt);
MBB.insert(I, New);
}
}
@ -794,19 +934,18 @@ void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const{
}
int FrameIndex = MI.getOperand(i).getFrameIndex();
// This must be part of a four operand memory reference. Replace the
// FrameIndex with base register with EBP. Add add an offset to the offset.
MI.getOperand(i).ChangeToRegister(hasFP(MF) ? X86::EBP : X86::ESP, false);
// FrameIndex with base register with EBP. Add an offset to the offset.
MI.getOperand(i).ChangeToRegister(hasFP(MF) ? FramePtr : StackPtr, false);
// Now add the frame object offset to the offset from EBP.
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
MI.getOperand(i+3).getImmedValue()+4;
MI.getOperand(i+3).getImmedValue()+SlotSize;
if (!hasFP(MF))
Offset += MF.getFrameInfo()->getStackSize();
else
Offset += 4; // Skip the saved EBP
Offset += SlotSize; // Skip the saved EBP
MI.getOperand(i+3).ChangeToImmediate(Offset);
}
@ -815,7 +954,7 @@ void
X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{
if (hasFP(MF)) {
// Create a frame entry for the EBP register that must be saved.
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, -8);
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize,SlotSize * -2);
assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
"Slot for EBP register must be last in order to be found!");
}
@ -840,9 +979,9 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (!hasFP(MF))
NumBytes += MFI->getMaxCallFrameSize();
// Round the size to a multiple of the alignment (don't forget the 4 byte
// Round the size to a multiple of the alignment (don't forget the 4/8 byte
// offset though).
NumBytes = ((NumBytes+4)+Align-1)/Align*Align - 4;
NumBytes = ((NumBytes+SlotSize)+Align-1)/Align*Align - SlotSize;
}
// Update frame info to pretend that this is part of the stack...
@ -859,8 +998,10 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
MI = BuildMI(X86::CALLpcrel32, 1).addExternalSymbol("_alloca");
MBB.insert(MBBI, MI);
} else {
unsigned Opc = NumBytes < 128 ? X86::SUB32ri8 : X86::SUB32ri;
MI = BuildMI(Opc, 2, X86::ESP).addReg(X86::ESP).addImm(NumBytes);
unsigned Opc = (NumBytes < 128) ?
(Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri);
MI= BuildMI(Opc, 1, StackPtr).addReg(StackPtr).addImm(NumBytes);
MBB.insert(MBBI, MI);
}
}
@ -868,18 +1009,21 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (hasFP(MF)) {
// Get the offset of the stack slot for the EBP register... which is
// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexBegin())+4;
int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexBegin())+SlotSize;
// Save EBP into the appropriate stack slot...
MI = addRegOffset(BuildMI(X86::MOV32mr, 5), // mov [ESP-<offset>], EBP
X86::ESP, EBPOffset+NumBytes).addReg(X86::EBP);
// mov [ESP-<offset>], EBP
MI = addRegOffset(BuildMI(Is64Bit ? X86::MOV64mr : X86::MOV32mr, 5),
StackPtr, EBPOffset+NumBytes).addReg(FramePtr);
MBB.insert(MBBI, MI);
// Update EBP with the new base value...
if (NumBytes == 4) // mov EBP, ESP
MI = BuildMI(X86::MOV32rr, 2, X86::EBP).addReg(X86::ESP);
if (NumBytes == SlotSize) // mov EBP, ESP
MI = BuildMI(Is64Bit ? X86::MOV64rr : X86::MOV32rr, 2, FramePtr).
addReg(StackPtr);
else // lea EBP, [ESP+StackSize]
MI = addRegOffset(BuildMI(X86::LEA32r, 5, X86::EBP), X86::ESP,NumBytes-4);
MI = addRegOffset(BuildMI(Is64Bit ? X86::LEA64r : X86::LEA32r,
5, FramePtr), StackPtr, NumBytes-SlotSize);
MBB.insert(MBBI, MI);
}
@ -916,13 +1060,14 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
if (hasFP(MF)) {
// Get the offset of the stack slot for the EBP register... which is
// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexEnd()-1)+4;
int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexEnd()-1)+SlotSize;
// mov ESP, EBP
BuildMI(MBB, MBBI, X86::MOV32rr, 1, X86::ESP).addReg(X86::EBP);
BuildMI(MBB, MBBI, Is64Bit ? X86::MOV64rr : X86::MOV32rr, 1, StackPtr).
addReg(FramePtr);
// pop EBP
BuildMI(MBB, MBBI, X86::POP32r, 0, X86::EBP);
BuildMI(MBB, MBBI, Is64Bit ? X86::POP64r : X86::POP32r, 0, FramePtr);
} else {
// Get the number of bytes allocated from the FrameInfo...
unsigned NumBytes = MFI->getStackSize();
@ -932,14 +1077,15 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
// instruction, merge the two instructions.
if (MBBI != MBB.begin()) {
MachineBasicBlock::iterator PI = prior(MBBI);
if ((PI->getOpcode() == X86::ADD32ri ||
PI->getOpcode() == X86::ADD32ri8) &&
PI->getOperand(0).getReg() == X86::ESP) {
unsigned Opc = PI->getOpcode();
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
PI->getOperand(0).getReg() == StackPtr) {
NumBytes += PI->getOperand(2).getImmedValue();
MBB.erase(PI);
} else if ((PI->getOpcode() == X86::SUB32ri ||
PI->getOpcode() == X86::SUB32ri8) &&
PI->getOperand(0).getReg() == X86::ESP) {
} else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
PI->getOperand(0).getReg() == StackPtr) {
NumBytes -= PI->getOperand(2).getImmedValue();
MBB.erase(PI);
} else if (PI->getOpcode() == X86::ADJSTACKPTRri) {
@ -949,11 +1095,15 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
}
if (NumBytes > 0) {
unsigned Opc = NumBytes < 128 ? X86::ADD32ri8 : X86::ADD32ri;
BuildMI(MBB, MBBI, Opc, 2, X86::ESP).addReg(X86::ESP).addImm(NumBytes);
unsigned Opc = (NumBytes < 128) ?
(Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
(Is64Bit ? X86::ADD64ri32 : X86::ADD32ri);
BuildMI(MBB, MBBI, Opc, 2, StackPtr).addReg(StackPtr).addImm(NumBytes);
} else if ((int)NumBytes < 0) {
unsigned Opc = -NumBytes < 128 ? X86::SUB32ri8 : X86::SUB32ri;
BuildMI(MBB, MBBI, Opc, 2, X86::ESP).addReg(X86::ESP).addImm(-NumBytes);
unsigned Opc = (-NumBytes < 128) ?
(Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri);
BuildMI(MBB, MBBI, Opc, 2, StackPtr).addReg(StackPtr).addImm(-NumBytes);
}
}
}
@ -964,7 +1114,7 @@ unsigned X86RegisterInfo::getRARegister() const {
}
unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const {
return hasFP(MF) ? X86::EBP : X86::ESP;
return hasFP(MF) ? FramePtr : StackPtr;
}
namespace llvm {
@ -974,68 +1124,160 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::ValueType VT, bool High) {
case MVT::i8:
if (High) {
switch (Reg) {
default: return Reg;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX:
default: return 0;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AH;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX:
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
return X86::DH;
case X86::CH: case X86::CL: case X86::CX: case X86::ECX:
case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
return X86::CH;
case X86::BH: case X86::BL: case X86::BX: case X86::EBX:
case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
return X86::BH;
}
} else {
switch (Reg) {
default: return Reg;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX:
default: return 0;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AL;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX:
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
return X86::DL;
case X86::CH: case X86::CL: case X86::CX: case X86::ECX:
case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
return X86::CL;
case X86::BH: case X86::BL: case X86::BX: case X86::EBX:
case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
return X86::BL;
case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
return X86::SIL;
case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
return X86::DIL;
case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
return X86::BPL;
case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
return X86::SPL;
case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
return X86::R8B;
case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
return X86::R9B;
case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
return X86::R10B;
case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
return X86::R11B;
case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
return X86::R12B;
case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
return X86::R13B;
case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
return X86::R14B;
case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
return X86::R15B;
}
}
case MVT::i16:
switch (Reg) {
default: return Reg;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX:
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX:
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
return X86::DX;
case X86::CH: case X86::CL: case X86::CX: case X86::ECX:
case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
return X86::CX;
case X86::BH: case X86::BL: case X86::BX: case X86::EBX:
case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
return X86::BX;
case X86::ESI:
case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
return X86::SI;
case X86::EDI:
case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
return X86::DI;
case X86::EBP:
case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
return X86::BP;
case X86::ESP:
case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
return X86::SP;
case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
return X86::R8W;
case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
return X86::R9W;
case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
return X86::R10W;
case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
return X86::R11W;
case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
return X86::R12W;
case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
return X86::R13W;
case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
return X86::R14W;
case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
return X86::R15W;
}
case MVT::i32:
switch (Reg) {
default: return true;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX:
default: return Reg;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::EAX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX:
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
return X86::EDX;
case X86::CH: case X86::CL: case X86::CX: case X86::ECX:
case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
return X86::ECX;
case X86::BH: case X86::BL: case X86::BX: case X86::EBX:
case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
return X86::EBX;
case X86::SI:
case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
return X86::ESI;
case X86::DI:
case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
return X86::EDI;
case X86::BP:
case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
return X86::EBP;
case X86::SP:
case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
return X86::ESP;
case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
return X86::R8D;
case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
return X86::R9D;
case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
return X86::R10D;
case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
return X86::R11D;
case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
return X86::R12D;
case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
return X86::R13D;
case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
return X86::R14D;
case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
return X86::R15D;
}
case MVT::i64:
switch (Reg) {
default: return Reg;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::RAX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
return X86::RDX;
case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
return X86::RCX;
case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
return X86::RBX;
case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
return X86::RSI;
case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
return X86::RDI;
case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
return X86::RBP;
case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
return X86::RSP;
case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
return X86::R8;
case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
return X86::R9;
case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
return X86::R10;
case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
return X86::R11;
case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
return X86::R12;
case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
return X86::R13;
case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
return X86::R14;
case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
return X86::R15;
}
}

View File

@ -20,10 +20,26 @@
namespace llvm {
class Type;
class TargetInstrInfo;
class X86TargetMachine;
struct X86RegisterInfo : public X86GenRegisterInfo {
X86TargetMachine &TM;
const TargetInstrInfo &TII;
X86RegisterInfo(const TargetInstrInfo &tii);
private:
/// Is64Bit - Is the target 64-bits.
bool Is64Bit;
/// SlotSize - Stack slot size in bytes.
unsigned SlotSize;
/// StackPtr - X86 physical register used as stack ptr.
unsigned StackPtr;
/// FramePtr - X86 physical register used as frame ptr.
unsigned FramePtr;
public:
X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
/// Code Generation virtual methods...
void storeRegToStackSlot(MachineBasicBlock &MBB,

View File

@ -23,35 +23,92 @@ let Namespace = "X86" in {
// because the register file generator is smart enough to figure out that
// AL aliases AX if we tell it that AX aliased AL (for example).
// FIXME: X86-64 have different Dwarf numbers.
// 64-bit registers, X86-64 only
def RAX : Register<"RAX">, DwarfRegNum<0>;
def RDX : Register<"RDX">, DwarfRegNum<1>;
def RCX : Register<"RCX">, DwarfRegNum<2>;
def RBX : Register<"RBX">, DwarfRegNum<3>;
def RSI : Register<"RSI">, DwarfRegNum<4>;
def RDI : Register<"RDI">, DwarfRegNum<5>;
def RBP : Register<"RBP">, DwarfRegNum<6>;
def RSP : Register<"RSP">, DwarfRegNum<7>;
def R8 : Register<"R8">, DwarfRegNum<8>;
def R9 : Register<"R9">, DwarfRegNum<9>;
def R10 : Register<"R10">, DwarfRegNum<10>;
def R11 : Register<"R11">, DwarfRegNum<11>;
def R12 : Register<"R12">, DwarfRegNum<12>;
def R13 : Register<"R13">, DwarfRegNum<13>;
def R14 : Register<"R14">, DwarfRegNum<14>;
def R15 : Register<"R15">, DwarfRegNum<15>;
// 32-bit registers
def EAX : Register<"EAX">, DwarfRegNum<0>;
def ECX : Register<"ECX">, DwarfRegNum<1>;
def EDX : Register<"EDX">, DwarfRegNum<2>;
def EBX : Register<"EBX">, DwarfRegNum<3>;
def ESP : Register<"ESP">, DwarfRegNum<4>;
def EBP : Register<"EBP">, DwarfRegNum<5>;
def ESI : Register<"ESI">, DwarfRegNum<6>;
def EDI : Register<"EDI">, DwarfRegNum<7>;
def EAX : RegisterGroup<"EAX", [RAX]>, DwarfRegNum<0>;
def ECX : RegisterGroup<"ECX", [RCX]>, DwarfRegNum<1>;
def EDX : RegisterGroup<"EDX", [RDX]>, DwarfRegNum<2>;
def EBX : RegisterGroup<"EBX", [RBX]>, DwarfRegNum<3>;
def ESP : RegisterGroup<"ESP", [RSP]>, DwarfRegNum<4>;
def EBP : RegisterGroup<"EBP", [RBP]>, DwarfRegNum<5>;
def ESI : RegisterGroup<"ESI", [RSI]>, DwarfRegNum<6>;
def EDI : RegisterGroup<"EDI", [RDI]>, DwarfRegNum<7>;
// X86-64 only
def R8D : RegisterGroup<"R8D", [R8]>, DwarfRegNum<8>;
def R9D : RegisterGroup<"R9D", [R9]>, DwarfRegNum<9>;
def R10D : RegisterGroup<"R10D", [R10]>, DwarfRegNum<10>;
def R11D : RegisterGroup<"R11D", [R11]>, DwarfRegNum<11>;
def R12D : RegisterGroup<"R12D", [R12]>, DwarfRegNum<12>;
def R13D : RegisterGroup<"R13D", [R13]>, DwarfRegNum<13>;
def R14D : RegisterGroup<"R14D", [R14]>, DwarfRegNum<14>;
def R15D : RegisterGroup<"R15D", [R15]>, DwarfRegNum<15>;
// 16-bit registers
def AX : RegisterGroup<"AX", [EAX]>, DwarfRegNum<0>;
def CX : RegisterGroup<"CX", [ECX]>, DwarfRegNum<1>;
def DX : RegisterGroup<"DX", [EDX]>, DwarfRegNum<2>;
def BX : RegisterGroup<"BX", [EBX]>, DwarfRegNum<3>;
def SP : RegisterGroup<"SP", [ESP]>, DwarfRegNum<4>;
def BP : RegisterGroup<"BP", [EBP]>, DwarfRegNum<5>;
def SI : RegisterGroup<"SI", [ESI]>, DwarfRegNum<6>;
def DI : RegisterGroup<"DI", [EDI]>, DwarfRegNum<7>;
def AX : RegisterGroup<"AX", [EAX,RAX]>, DwarfRegNum<0>;
def CX : RegisterGroup<"CX", [ECX,RCX]>, DwarfRegNum<1>;
def DX : RegisterGroup<"DX", [EDX,RDX]>, DwarfRegNum<2>;
def BX : RegisterGroup<"BX", [EBX,RBX]>, DwarfRegNum<3>;
def SP : RegisterGroup<"SP", [ESP,RSP]>, DwarfRegNum<4>;
def BP : RegisterGroup<"BP", [EBP,RBP]>, DwarfRegNum<5>;
def SI : RegisterGroup<"SI", [ESI,RSI]>, DwarfRegNum<6>;
def DI : RegisterGroup<"DI", [EDI,RDI]>, DwarfRegNum<7>;
// X86-64 only
def R8W : RegisterGroup<"R8W", [R8D,R8]>, DwarfRegNum<8>;
def R9W : RegisterGroup<"R9W", [R9D,R9]>, DwarfRegNum<9>;
def R10W : RegisterGroup<"R10W", [R10D,R10]>, DwarfRegNum<10>;
def R11W : RegisterGroup<"R11W", [R11D,R11]>, DwarfRegNum<11>;
def R12W : RegisterGroup<"R12W", [R12D,R12]>, DwarfRegNum<12>;
def R13W : RegisterGroup<"R13W", [R13D,R13]>, DwarfRegNum<13>;
def R14W : RegisterGroup<"R14W", [R14D,R14]>, DwarfRegNum<14>;
def R15W : RegisterGroup<"R15W", [R15D,R15]>, DwarfRegNum<15>;
// 8-bit registers
def AL : RegisterGroup<"AL", [AX,EAX]>, DwarfRegNum<0>;
def CL : RegisterGroup<"CL", [CX,ECX]>, DwarfRegNum<1>;
def DL : RegisterGroup<"DL", [DX,EDX]>, DwarfRegNum<2>;
def BL : RegisterGroup<"BL", [BX,EBX]>, DwarfRegNum<3>;
def AH : RegisterGroup<"AH", [AX,EAX]>, DwarfRegNum<0>;
def CH : RegisterGroup<"CH", [CX,ECX]>, DwarfRegNum<1>;
def DH : RegisterGroup<"DH", [DX,EDX]>, DwarfRegNum<2>;
def BH : RegisterGroup<"BH", [BX,EBX]>, DwarfRegNum<3>;
// Low registers
def AL : RegisterGroup<"AL", [AX,EAX,RAX]>, DwarfRegNum<0>;
def CL : RegisterGroup<"CL", [CX,ECX,RCX]>, DwarfRegNum<1>;
def DL : RegisterGroup<"DL", [DX,EDX,RDX]>, DwarfRegNum<2>;
def BL : RegisterGroup<"BL", [BX,EBX,RBX]>, DwarfRegNum<3>;
// X86-64 only
def SIL : RegisterGroup<"SIL", [SI,ESI,RSI]>, DwarfRegNum<4>;
def DIL : RegisterGroup<"DIL", [DI,EDI,RDI]>, DwarfRegNum<5>;
def BPL : RegisterGroup<"BPL", [BP,EBP,RBP]>, DwarfRegNum<6>;
def SPL : RegisterGroup<"SPL", [SP,ESP,RSP]>, DwarfRegNum<7>;
def R8B : RegisterGroup<"R8B", [R8W,R8D,R8]>, DwarfRegNum<8>;
def R9B : RegisterGroup<"R9B", [R9W,R9D,R9]>, DwarfRegNum<9>;
def R10B : RegisterGroup<"R10B", [R10W,R10D,R10]>, DwarfRegNum<10>;
def R11B : RegisterGroup<"R11B", [R11W,R11D,R11]>, DwarfRegNum<11>;
def R12B : RegisterGroup<"R12B", [R12W,R12D,R12]>, DwarfRegNum<12>;
def R13B : RegisterGroup<"R13B", [R13W,R13D,R13]>, DwarfRegNum<13>;
def R14B : RegisterGroup<"R14B", [R14W,R14D,R14]>, DwarfRegNum<14>;
def R15B : RegisterGroup<"R15B", [R15W,R15D,R15]>, DwarfRegNum<15>;
// High registers X86-32 only
def AH : RegisterGroup<"AH", [AX,EAX,RAX]>, DwarfRegNum<0>;
def CH : RegisterGroup<"CH", [CX,ECX,RCX]>, DwarfRegNum<1>;
def DH : RegisterGroup<"DH", [DX,EDX,RDX]>, DwarfRegNum<2>;
def BH : RegisterGroup<"BH", [BX,EBX,RBX]>, DwarfRegNum<3>;
// MMX Registers. These are actually aliased to ST0 .. ST7
def MM0 : Register<"MM0">, DwarfRegNum<29>;
@ -73,14 +130,24 @@ let Namespace = "X86" in {
def FP6 : Register<"FP6">, DwarfRegNum<-1>;
// XMM Registers, used by the various SSE instruction set extensions
def XMM0: Register<"XMM0">, DwarfRegNum<21>;
def XMM1: Register<"XMM1">, DwarfRegNum<22>;
def XMM2: Register<"XMM2">, DwarfRegNum<23>;
def XMM3: Register<"XMM3">, DwarfRegNum<24>;
def XMM4: Register<"XMM4">, DwarfRegNum<25>;
def XMM5: Register<"XMM5">, DwarfRegNum<26>;
def XMM6: Register<"XMM6">, DwarfRegNum<27>;
def XMM7: Register<"XMM7">, DwarfRegNum<28>;
def XMM0: Register<"XMM0">, DwarfRegNum<17>;
def XMM1: Register<"XMM1">, DwarfRegNum<18>;
def XMM2: Register<"XMM2">, DwarfRegNum<19>;
def XMM3: Register<"XMM3">, DwarfRegNum<20>;
def XMM4: Register<"XMM4">, DwarfRegNum<21>;
def XMM5: Register<"XMM5">, DwarfRegNum<22>;
def XMM6: Register<"XMM6">, DwarfRegNum<23>;
def XMM7: Register<"XMM7">, DwarfRegNum<24>;
// X86-64 only
def XMM8: Register<"XMM8">, DwarfRegNum<25>;
def XMM9: Register<"XMM9">, DwarfRegNum<26>;
def XMM10: Register<"XMM10">, DwarfRegNum<27>;
def XMM11: Register<"XMM11">, DwarfRegNum<28>;
def XMM12: Register<"XMM12">, DwarfRegNum<29>;
def XMM13: Register<"XMM13">, DwarfRegNum<30>;
def XMM14: Register<"XMM14">, DwarfRegNum<31>;
def XMM15: Register<"XMM15">, DwarfRegNum<32>;
// Floating point stack registers
def ST0 : Register<"ST(0)">, DwarfRegNum<11>;
@ -99,52 +166,247 @@ let Namespace = "X86" in {
// implicitly defined to be the register allocation order.
//
// List AL,CL,DL before AH,CH,DH, as X86 processors often suffer from false
// dependences between upper and lower parts of the register. BL and BH are
// last because they are call clobbered. Both Athlon and P4 chips suffer this
// issue.
def GR8 : RegisterClass<"X86", [i8], 8, [AL, CL, DL, AH, CH, DH, BL, BH]>;
def GR16 : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP]> {
// List call-clobbered registers before callee-save registers. RBX, RBP, (and
// R12, R13, R14, and R15 for X86-64) are callee-save registers.
// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
// R8B, ... R15B.
// FIXME: Allow AH, CH, DH, BH in 64-mode for non-REX instructions,
def GR8 : RegisterClass<"X86", [i8], 8,
[AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL, BPL, SPL,
R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]> {
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
// Does the function dedicate RBP / EBP to being a frame ptr?
// If so, don't allocate SPL or BPL.
static const unsigned X86_GR8_AO_64_fp[] =
{X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
X86::R8B, X86::R9B, X86::R10B, X86::R11B,
X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B};
// If not, just don't allocate SPL.
static const unsigned X86_GR8_AO_64[] =
{X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
X86::R8B, X86::R9B, X86::R10B, X86::R11B,
X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL};
// In 32-mode, none of the 8-bit registers aliases EBP or ESP.
static const unsigned X86_GR8_AO_32[] =
{X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH};
GR8Class::iterator
GR8Class::allocation_order_begin(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
if (!Subtarget.is64Bit())
return X86_GR8_AO_32;
else if (hasFP(MF))
return X86_GR8_AO_64_fp;
else
return X86_GR8_AO_64;
}
GR8Class::iterator
GR8Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
if (!Subtarget.is64Bit())
return X86_GR8_AO_32 + (sizeof(X86_GR8_AO_32) / sizeof(unsigned));
else if (hasFP(MF))
return X86_GR8_AO_64_fp + (sizeof(X86_GR8_AO_64_fp) / sizeof(unsigned));
else
return X86_GR8_AO_64 + (sizeof(X86_GR8_AO_64) / sizeof(unsigned));
}
}];
}
def GR16 : RegisterClass<"X86", [i16], 16,
[AX, CX, DX, SI, DI, BX, BP, SP,
R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]> {
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
// Does the function dedicate RBP / EBP to being a frame ptr?
// If so, don't allocate SP or BP.
static const unsigned X86_GR16_AO_64_fp[] =
{X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
X86::R8W, X86::R9W, X86::R10W, X86::R11W,
X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W};
static const unsigned X86_GR16_AO_32_fp[] =
{X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX};
// If not, just don't allocate SPL.
static const unsigned X86_GR16_AO_64[] =
{X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
X86::R8W, X86::R9W, X86::R10W, X86::R11W,
X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W, X86::BP};
static const unsigned X86_GR16_AO_32[] =
{X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP};
GR16Class::iterator
GR16Class::allocation_order_begin(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
if (Subtarget.is64Bit()) {
if (hasFP(MF))
return X86_GR16_AO_64_fp;
else
return X86_GR16_AO_64;
} else {
if (hasFP(MF))
return X86_GR16_AO_32_fp;
else
return X86_GR16_AO_32;
}
}
GR16Class::iterator
GR16Class::allocation_order_end(const MachineFunction &MF) const {
if (hasFP(MF)) // Does the function dedicate EBP to being a frame ptr?
return end()-2; // If so, don't allocate SP or BP
else
return end()-1; // If not, just don't allocate SP
const TargetMachine &TM = MF.getTarget();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
if (Subtarget.is64Bit()) {
if (hasFP(MF))
return X86_GR16_AO_64_fp+(sizeof(X86_GR16_AO_64_fp)/sizeof(unsigned));
else
return X86_GR16_AO_64 + (sizeof(X86_GR16_AO_64) / sizeof(unsigned));
} else {
if (hasFP(MF))
return X86_GR16_AO_32_fp+(sizeof(X86_GR16_AO_32_fp)/sizeof(unsigned));
else
return X86_GR16_AO_32 + (sizeof(X86_GR16_AO_32) / sizeof(unsigned));
}
}
}];
}
def GR32 : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]> {
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
// Does the function dedicate RBP / EBP to being a frame ptr?
// If so, don't allocate ESP or EBP.
static const unsigned X86_GR32_AO_64_fp[] =
{X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
X86::R8D, X86::R9D, X86::R10D, X86::R11D,
X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D};
static const unsigned X86_GR32_AO_32_fp[] =
{X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX};
// If not, just don't allocate SPL.
static const unsigned X86_GR32_AO_64[] =
{X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
X86::R8D, X86::R9D, X86::R10D, X86::R11D,
X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP};
static const unsigned X86_GR32_AO_32[] =
{X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP};
GR32Class::iterator
GR32Class::allocation_order_begin(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
if (Subtarget.is64Bit()) {
if (hasFP(MF))
return X86_GR32_AO_64_fp;
else
return X86_GR32_AO_64;
} else {
if (hasFP(MF))
return X86_GR32_AO_32_fp;
else
return X86_GR32_AO_32;
}
}
GR32Class::iterator
GR32Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
if (Subtarget.is64Bit()) {
if (hasFP(MF))
return X86_GR32_AO_64_fp+(sizeof(X86_GR32_AO_64_fp)/sizeof(unsigned));
else
return X86_GR32_AO_64 + (sizeof(X86_GR32_AO_64) / sizeof(unsigned));
} else {
if (hasFP(MF))
return X86_GR32_AO_32_fp+(sizeof(X86_GR32_AO_32_fp)/sizeof(unsigned));
else
return X86_GR32_AO_32 + (sizeof(X86_GR32_AO_32) / sizeof(unsigned));
}
}
}];
}
def GR64 : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP, RSP]> {
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
GR32Class::iterator
GR32Class::allocation_order_end(const MachineFunction &MF) const {
if (hasFP(MF)) // Does the function dedicate EBP to being a frame ptr?
return end()-2; // If so, don't allocate ESP or EBP
GR64Class::iterator
GR64Class::allocation_order_end(const MachineFunction &MF) const {
if (hasFP(MF)) // Does the function dedicate RBP to being a frame ptr?
return end()-2; // If so, don't allocate RSP or RBP
else
return end()-1; // If not, just don't allocate ESP
return end()-1; // If not, just don't allocate RSP
}
}];
}
// GR16, GR32 subclasses which contain registers that have R8 sub-registers.
// These should only be used for 32-bit mode.
def GR16_ : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]>;
def GR32_ : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]>;
// Scalar SSE2 floating point registers.
def FR32 : RegisterClass<"X86", [f32], 32,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>;
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11,
XMM12, XMM13, XMM14, XMM15]> {
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
FR32Class::iterator
FR32Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
if (!Subtarget.is64Bit())
return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
else
return end();
}
}];
}
def FR64 : RegisterClass<"X86", [f64], 64,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>;
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11,
XMM12, XMM13, XMM14, XMM15]> {
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
FR64Class::iterator
FR64Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
if (!Subtarget.is64Bit())
return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
else
return end();
}
}];
}
// FIXME: This sets up the floating point register files as though they are f64
// values, though they really are f80 values. This will cause us to spill
@ -174,4 +436,21 @@ def RST : RegisterClass<"X86", [f64], 32,
def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32], 64,
[MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>;
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11,
XMM12, XMM13, XMM14, XMM15]> {
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
VR128Class::iterator
VR128Class::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
if (!Subtarget.is64Bit())
return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
else
return end();
}
}];
}

View File

@ -12,9 +12,10 @@
//===----------------------------------------------------------------------===//
#include "X86Subtarget.h"
#include "X86GenSubtarget.inc"
#include "llvm/Module.h"
#include "llvm/Support/CommandLine.h"
#include "X86GenSubtarget.inc"
#include <iostream>
using namespace llvm;
cl::opt<X86Subtarget::AsmWriterFlavorTy>
@ -29,7 +30,18 @@ AsmWriterFlavor("x86-asm-syntax", cl::init(X86Subtarget::unset),
/// specified arguments. If we can't run cpuid on the host, return true.
static bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
unsigned *rECX, unsigned *rEDX) {
#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
#if defined(__x86_64__)
asm ("pushq\t%%rbx\n\t"
"cpuid\n\t"
"movl\t%%ebx, %%esi\n\t"
"popq\t%%rbx"
: "=a" (*rEAX),
"=S" (*rEBX),
"=c" (*rECX),
"=d" (*rEDX)
: "a" (value));
return false;
#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
#if defined(__GNUC__)
asm ("pushl\t%%ebx\n\t"
"cpuid\n\t"
@ -99,8 +111,8 @@ static const char *GetCurrentX86CPU() {
case 9:
case 13: return "pentium-m";
case 14: return "yonah";
default:
return (Model > 14) ? "yonah" : "i686";
case 15: return "core2";
default: return "i686";
}
case 15: {
switch (Model) {
@ -154,14 +166,16 @@ static const char *GetCurrentX86CPU() {
}
}
X86Subtarget::X86Subtarget(const Module &M, const std::string &FS) {
stackAlignment = 8;
// FIXME: this is a known good value for Yonah. Not sure about others.
MinRepStrSizeThreshold = 128;
X86SSELevel = NoMMXSSE;
X863DNowLevel = NoThreeDNow;
AsmFlavor = AsmWriterFlavor;
Is64Bit = false;
X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
: AsmFlavor(AsmWriterFlavor)
, X86SSELevel(NoMMXSSE)
, X863DNowLevel(NoThreeDNow)
, HasX86_64(false)
, stackAlignment(8)
// FIXME: this is a known good value for Yonah. How about others?
, MinRepStrSizeThreshold(128)
, Is64Bit(is64Bit)
, TargetType(isELF) { // Default to ELF unless otherwise specified.
// Determine default and user specified characteristics
std::string CPU = GetCurrentX86CPU();
@ -169,9 +183,12 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS) {
// Parse features string.
ParseSubtargetFeatures(FS, CPU);
// Default to ELF unless otherwise specified.
TargetType = isELF;
if (Is64Bit && !HasX86_64) {
std::cerr << "Warning: Generation of 64-bit code for a 32-bit processor "
"requested.\n";
HasX86_64 = true;
}
// Set the boolean corresponding to the current target triple, or the default
// if one cannot be determined, to true.
const std::string& TT = M.getTargetTriple();

View File

@ -44,9 +44,9 @@ protected:
/// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported.
X863DNowEnum X863DNowLevel;
/// Is64Bit - True if the processor supports Em64T.
bool Is64Bit;
/// HasX86_64 - True if the processor supports X86-64 instructions.
bool HasX86_64;
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
@ -55,6 +55,11 @@ protected:
/// Min. memset / memcpy size that is turned into rep/movs, rep/stos ops.
unsigned MinRepStrSizeThreshold;
private:
/// Is64Bit - True if the processor supports 64-bit instructions and module
/// pointer size is 64 bit.
bool Is64Bit;
public:
enum {
isELF, isCygwin, isDarwin, isWindows
@ -63,7 +68,7 @@ public:
/// This constructor initializes the data members to match that
/// of the specified module.
///
X86Subtarget(const Module &M, const std::string &FS);
X86Subtarget(const Module &M, const std::string &FS, bool is64Bit);
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every

View File

@ -26,13 +26,16 @@ X86TargetAsmInfo::X86TargetAsmInfo(const X86TargetMachine &TM) {
case X86Subtarget::isDarwin:
AlignmentIsInBytes = false;
GlobalPrefix = "_";
Data64bitsDirective = 0; // we can't emit a 64-bit unit
if (!Subtarget->is64Bit())
Data64bitsDirective = 0; // we can't emit a 64-bit unit
ZeroDirective = "\t.space\t"; // ".space N" emits N zeros.
PrivateGlobalPrefix = "L"; // Marker for constant pool idxs
ConstantPoolSection = "\t.const\n";
JumpTableDataSection = "\t.const\n"; // FIXME: depends on PIC mode
FourByteConstantSection = "\t.literal4\n";
EightByteConstantSection = "\t.literal8\n";
if (Subtarget->is64Bit())
SixteenByteConstantSection = "\t.literal16\n";
LCOMMDirective = "\t.lcomm\t";
COMMDirectiveTakesAlignment = false;
HasDotTypeDotSizeDirective = false;

View File

@ -33,22 +33,31 @@ int X86TargetMachineModule = 0;
namespace {
// Register the target.
RegisterTarget<X86TargetMachine> X("x86", " IA-32 (Pentium and above)");
RegisterTarget<X86_32TargetMachine>
X("x86", " 32-bit X86: Pentium-Pro and above");
RegisterTarget<X86_64TargetMachine>
Y("x86-64", " 64-bit X86: EM64T and AMD64");
}
const TargetAsmInfo *X86TargetMachine::createTargetAsmInfo() const {
return new X86TargetAsmInfo(*this);
}
unsigned X86TargetMachine::getJITMatchQuality() {
unsigned X86_32TargetMachine::getJITMatchQuality() {
#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
return 10;
#else
return 0;
#endif
return 0;
}
unsigned X86TargetMachine::getModuleMatchQuality(const Module &M) {
unsigned X86_64TargetMachine::getJITMatchQuality() {
#if defined(__x86_64__)
return 10;
#endif
return 0;
}
unsigned X86_32TargetMachine::getModuleMatchQuality(const Module &M) {
// We strongly match "i[3-9]86-*".
std::string TT = M.getTargetTriple();
if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
@ -65,18 +74,55 @@ unsigned X86TargetMachine::getModuleMatchQuality(const Module &M) {
return getJITMatchQuality()/2;
}
unsigned X86_64TargetMachine::getModuleMatchQuality(const Module &M) {
// We strongly match "x86_64-*".
std::string TT = M.getTargetTriple();
if (TT.size() >= 7 && TT[0] == 'x' && TT[1] == '8' && TT[2] == '6' &&
TT[3] == '_' && TT[4] == '6' && TT[5] == '4' && TT[6] == '-')
return 20;
if (M.getEndianness() == Module::LittleEndian &&
M.getPointerSize() == Module::Pointer64)
return 10; // Weak match
else if (M.getEndianness() != Module::AnyEndianness ||
M.getPointerSize() != Module::AnyPointerSize)
return 0; // Match for some other target
return getJITMatchQuality()/2;
}
X86_32TargetMachine::X86_32TargetMachine(const Module &M, const std::string &FS)
: X86TargetMachine(M, FS, false) {
}
X86_64TargetMachine::X86_64TargetMachine(const Module &M, const std::string &FS)
: X86TargetMachine(M, FS, true) {
}
/// X86TargetMachine ctor - Create an ILP32 architecture model
///
X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS)
: Subtarget(M, FS), DataLayout("e-p:32:32-d:32-l:32"),
X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS, bool is64Bit)
: Subtarget(M, FS, is64Bit),
DataLayout(Subtarget.is64Bit() ?
std::string("e-p:64:64-d:32-l:32") :
std::string("e-p:32:32-d:32-l:32")),
FrameInfo(TargetFrameInfo::StackGrowsDown,
Subtarget.getStackAlignment(), -4),
Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
InstrInfo(*this), JITInfo(*this), TLInfo(*this) {
if (getRelocationModel() == Reloc::Default)
if (Subtarget.isTargetDarwin())
setRelocationModel(Reloc::DynamicNoPIC);
else
setRelocationModel(Reloc::PIC_);
if (Subtarget.is64Bit()) {
// No DynamicNoPIC support under X86-64.
if (getRelocationModel() == Reloc::DynamicNoPIC)
setRelocationModel(Reloc::PIC_);
// Default X86-64 code model is small.
if (getCodeModel() == CodeModel::Default)
setCodeModel(CodeModel::Small);
}
}
//===----------------------------------------------------------------------===//

View File

@ -37,7 +37,7 @@ protected:
virtual const TargetAsmInfo *createTargetAsmInfo() const;
public:
X86TargetMachine(const Module &M, const std::string &FS);
X86TargetMachine(const Module &M, const std::string &FS, bool is64Bit);
virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
@ -54,6 +54,7 @@ public:
static unsigned getModuleMatchQuality(const Module &M);
static unsigned getJITMatchQuality();
// Set up the pass pipeline.
virtual bool addInstSelector(FunctionPassManager &PM, bool Fast);
virtual bool addPostRegAlloc(FunctionPassManager &PM, bool Fast);
@ -64,6 +65,27 @@ public:
virtual bool addCodeEmitter(FunctionPassManager &PM, bool Fast,
MachineCodeEmitter &MCE);
};
/// X86_32TargetMachine - X86 32-bit target machine.
///
class X86_32TargetMachine : public X86TargetMachine {
public:
X86_32TargetMachine(const Module &M, const std::string &FS);
static unsigned getJITMatchQuality();
static unsigned getModuleMatchQuality(const Module &M);
};
/// X86_64TargetMachine - X86 64-bit target machine.
///
class X86_64TargetMachine : public X86TargetMachine {
public:
X86_64TargetMachine(const Module &M, const std::string &FS);
static unsigned getJITMatchQuality();
static unsigned getModuleMatchQuality(const Module &M);
};
} // End llvm namespace
#endif