forked from OSchip/llvm-project
parent
02a7d09b40
commit
11b0a5dbd4
|
@ -0,0 +1,269 @@
|
|||
//===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===//
|
||||
|
||||
Implement different PIC models? Right now we only support Mac OS X with small
|
||||
PIC code model.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Make use of "Red Zone".
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Implement __int128 and long double support.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
For this:
|
||||
|
||||
extern void xx(void);
|
||||
void bar(void) {
|
||||
xx();
|
||||
}
|
||||
|
||||
gcc compiles to:
|
||||
|
||||
.globl _bar
|
||||
_bar:
|
||||
jmp _xx
|
||||
|
||||
We need to do the tailcall optimization as well.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
For this:
|
||||
|
||||
int test(int a)
|
||||
{
|
||||
return a * 3;
|
||||
}
|
||||
|
||||
We generates
|
||||
leal (%edi,%edi,2), %eax
|
||||
|
||||
We should be generating
|
||||
leal (%rdi,%rdi,2), %eax
|
||||
|
||||
instead. The later form does not require an address-size prefix 67H.
|
||||
|
||||
It's probably ok to simply emit the corresponding 64-bit super class registers
|
||||
in this case?
|
||||
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
AMD64 Optimization Manual 8.2 has some nice information about optimizing integer
|
||||
multiplication by a constant. How much of it applies to Intel's X86-64
|
||||
implementation? There are definite trade-offs to consider: latency vs. register
|
||||
pressure vs. code size.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Are we better off using branches instead of cmove to implement FP to
|
||||
unsigned i64?
|
||||
|
||||
_conv:
|
||||
ucomiss LC0(%rip), %xmm0
|
||||
cvttss2siq %xmm0, %rdx
|
||||
jb L3
|
||||
subss LC0(%rip), %xmm0
|
||||
movabsq $-9223372036854775808, %rax
|
||||
cvttss2siq %xmm0, %rdx
|
||||
xorq %rax, %rdx
|
||||
L3:
|
||||
movq %rdx, %rax
|
||||
ret
|
||||
|
||||
instead of
|
||||
|
||||
_conv:
|
||||
movss LCPI1_0(%rip), %xmm1
|
||||
cvttss2siq %xmm0, %rcx
|
||||
movaps %xmm0, %xmm2
|
||||
subss %xmm1, %xmm2
|
||||
cvttss2siq %xmm2, %rax
|
||||
movabsq $-9223372036854775808, %rdx
|
||||
xorq %rdx, %rax
|
||||
ucomiss %xmm1, %xmm0
|
||||
cmovb %rcx, %rax
|
||||
ret
|
||||
|
||||
Seems like the jb branch has high likelyhood of being taken. It would have
|
||||
saved a few instructions.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Poor codegen:
|
||||
|
||||
int X[2];
|
||||
int b;
|
||||
void test(void) {
|
||||
memset(X, b, 2*sizeof(X[0]));
|
||||
}
|
||||
|
||||
llc:
|
||||
movq _b@GOTPCREL(%rip), %rax
|
||||
movzbq (%rax), %rax
|
||||
movq %rax, %rcx
|
||||
shlq $8, %rcx
|
||||
orq %rax, %rcx
|
||||
movq %rcx, %rax
|
||||
shlq $16, %rax
|
||||
orq %rcx, %rax
|
||||
movq %rax, %rcx
|
||||
shlq $32, %rcx
|
||||
movq _X@GOTPCREL(%rip), %rdx
|
||||
orq %rax, %rcx
|
||||
movq %rcx, (%rdx)
|
||||
ret
|
||||
|
||||
gcc:
|
||||
movq _b@GOTPCREL(%rip), %rax
|
||||
movabsq $72340172838076673, %rdx
|
||||
movzbq (%rax), %rax
|
||||
imulq %rdx, %rax
|
||||
movq _X@GOTPCREL(%rip), %rdx
|
||||
movq %rax, (%rdx)
|
||||
ret
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Vararg function prologue can be further optimized. Currently all XMM registers
|
||||
are stored into register save area. Most of them can be eliminated since the
|
||||
upper bound of the number of XMM registers used are passed in %al. gcc produces
|
||||
something like the following:
|
||||
|
||||
movzbl %al, %edx
|
||||
leaq 0(,%rdx,4), %rax
|
||||
leaq 4+L2(%rip), %rdx
|
||||
leaq 239(%rsp), %rax
|
||||
jmp *%rdx
|
||||
movaps %xmm7, -15(%rax)
|
||||
movaps %xmm6, -31(%rax)
|
||||
movaps %xmm5, -47(%rax)
|
||||
movaps %xmm4, -63(%rax)
|
||||
movaps %xmm3, -79(%rax)
|
||||
movaps %xmm2, -95(%rax)
|
||||
movaps %xmm1, -111(%rax)
|
||||
movaps %xmm0, -127(%rax)
|
||||
L2:
|
||||
|
||||
It jumps over the movaps that do not need to be stored. Hard to see this being
|
||||
significant as it added 5 instruciton (including a indirect branch) to avoid
|
||||
executing 0 to 8 stores in the function prologue.
|
||||
|
||||
Perhaps we can optimize for the common case where no XMM registers are used for
|
||||
parameter passing. i.e. is %al == 0 jump over all stores. Or in the case of a
|
||||
leaf function where we can determine that no XMM input parameter is need, avoid
|
||||
emitting the stores at all.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
AMD64 has a complex calling convention for aggregate passing by value:
|
||||
|
||||
1. If the size of an object is larger than two eightbytes, or in C++, is a non-
|
||||
POD structure or union type, or contains unaligned fields, it has class
|
||||
MEMORY.
|
||||
2. Both eightbytes get initialized to class NO_CLASS.
|
||||
3. Each field of an object is classified recursively so that always two fields
|
||||
are considered. The resulting class is calculated according to the classes
|
||||
of the fields in the eightbyte:
|
||||
(a) If both classes are equal, this is the resulting class.
|
||||
(b) If one of the classes is NO_CLASS, the resulting class is the other
|
||||
class.
|
||||
(c) If one of the classes is MEMORY, the result is the MEMORY class.
|
||||
(d) If one of the classes is INTEGER, the result is the INTEGER.
|
||||
(e) If one of the classes is X87, X87UP, COMPLEX_X87 class, MEMORY is used as
|
||||
class.
|
||||
(f) Otherwise class SSE is used.
|
||||
4. Then a post merger cleanup is done:
|
||||
(a) If one of the classes is MEMORY, the whole argument is passed in memory.
|
||||
(b) If SSEUP is not preceeded by SSE, it is converted to SSE.
|
||||
|
||||
Currently llvm frontend does not handle this correctly.
|
||||
|
||||
Problem 1:
|
||||
typedef struct { int i; double d; } QuadWordS;
|
||||
It is currently passed in two i64 integer registers. However, gcc compiled
|
||||
callee expects the second element 'd' to be passed in XMM0.
|
||||
|
||||
Problem 2:
|
||||
typedef struct { int32_t i; float j; double d; } QuadWordS;
|
||||
The size of the first two fields == i64 so they will be combined and passed in
|
||||
a integer register RDI. The third field is still passed in XMM0.
|
||||
|
||||
Problem 3:
|
||||
typedef struct { int64_t i; int8_t j; int64_t d; } S;
|
||||
void test(S s)
|
||||
The size of this aggregate is greater than two i64 so it should be passed in
|
||||
memory. Currently llvm breaks this down and passed it in three integer
|
||||
registers.
|
||||
|
||||
Problem 4:
|
||||
Taking problem 3 one step ahead where a function expects a aggregate value
|
||||
in memory followed by more parameter(s) passed in register(s).
|
||||
void test(S s, int b)
|
||||
|
||||
LLVM IR does not allow parameter passing by aggregates, therefore it must break
|
||||
the aggregates value (in problem 3 and 4) into a number of scalar values:
|
||||
void %test(long %s.i, byte %s.j, long %s.d);
|
||||
|
||||
However, if the backend were to lower this code literally it would pass the 3
|
||||
values in integer registers. To force it be passed in memory, the frontend
|
||||
should change the function signiture to:
|
||||
void %test(long %undef1, long %undef2, long %undef3, long %undef4,
|
||||
long %undef5, long %undef6,
|
||||
long %s.i, byte %s.j, long %s.d);
|
||||
And the callee would look something like this:
|
||||
call void %test( undef, undef, undef, undef, undef, undef,
|
||||
%tmp.s.i, %tmp.s.j, %tmp.s.d );
|
||||
The first 6 undef parameters would exhaust the 6 integer registers used for
|
||||
parameter passing. The following three integer values would then be forced into
|
||||
memory.
|
||||
|
||||
For problem 4, the parameter 'd' would be moved to the front of the parameter
|
||||
list so it will be passed in register:
|
||||
void %test(int %d,
|
||||
long %undef1, long %undef2, long %undef3, long %undef4,
|
||||
long %undef5, long %undef6,
|
||||
long %s.i, byte %s.j, long %s.d);
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
For this:
|
||||
|
||||
extern int dst[];
|
||||
extern int* ptr;
|
||||
|
||||
void test(void) {
|
||||
ptr = dst;
|
||||
}
|
||||
|
||||
We generate this code for static relocation model:
|
||||
|
||||
_test:
|
||||
leaq _dst(%rip), %rax
|
||||
movq %rax, _ptr(%rip)
|
||||
ret
|
||||
|
||||
If we are in small code model, they we can treat _dst as a 32-bit constant.
|
||||
movq $_dst, _ptr(%rip)
|
||||
|
||||
Note, however, we should continue to use RIP relative addressing mode as much as
|
||||
possible. The above is actually one byte shorter than
|
||||
movq $_dst, _ptr
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Right now the asm printer assumes GlobalAddress are accessed via RIP relative
|
||||
addressing. Therefore, it is not possible to generate this:
|
||||
movabsq $__ZTV10polynomialIdE+16, %rax
|
||||
|
||||
That is ok for now since we currently only support small model. So the above
|
||||
is selected as
|
||||
leaq __ZTV10polynomialIdE+16(%rip), %rax
|
||||
|
||||
This is probably slightly slower but is much shorter than movabsq. However, if
|
||||
we were to support medium or larger code models, we need to use the movabs
|
||||
instruction. We should probably introduce something like AbsoluteAddress to
|
||||
distinguish it from GlobalAddress so the asm printer and JIT code emitter can
|
||||
do the right thing.
|
|
@ -20,8 +20,8 @@ include "../Target.td"
|
|||
// X86 Subtarget features.
|
||||
//
|
||||
|
||||
def Feature64Bit : SubtargetFeature<"64bit", "Is64Bit", "true",
|
||||
"Enable 64-bit instructions">;
|
||||
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
|
||||
"Support 64-bit instructions">;
|
||||
def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
|
||||
"Enable MMX instructions">;
|
||||
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
|
||||
|
@ -61,6 +61,8 @@ def : Proc<"prescott", [FeatureMMX, FeatureSSE1, FeatureSSE2,
|
|||
FeatureSSE3]>;
|
||||
def : Proc<"nocona", [FeatureMMX, FeatureSSE1, FeatureSSE2,
|
||||
FeatureSSE3, Feature64Bit]>;
|
||||
def : Proc<"core2", [FeatureMMX, FeatureSSE1, FeatureSSE2,
|
||||
FeatureSSE3, Feature64Bit]>;
|
||||
|
||||
def : Proc<"k6", [FeatureMMX]>;
|
||||
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
|
||||
|
@ -105,16 +107,20 @@ def X86InstrInfo : InstrInfo {
|
|||
// should be kept up-to-date with the fields in the X86InstrInfo.h file.
|
||||
let TSFlagsFields = ["FormBits",
|
||||
"hasOpSizePrefix",
|
||||
"hasAdSizePrefix",
|
||||
"Prefix",
|
||||
"hasREX_WPrefix",
|
||||
"ImmTypeBits",
|
||||
"FPFormBits",
|
||||
"Opcode"];
|
||||
let TSFlagsShifts = [0,
|
||||
6,
|
||||
7,
|
||||
11,
|
||||
8,
|
||||
12,
|
||||
13,
|
||||
16];
|
||||
16,
|
||||
24];
|
||||
}
|
||||
|
||||
// The X86 target supports two different syntaxes for emitting machine code.
|
||||
|
|
|
@ -126,8 +126,9 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
|
|||
O << '%';
|
||||
unsigned Reg = MO.getReg();
|
||||
if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
|
||||
MVT::ValueType VT = (strcmp(Modifier,"subreg16") == 0)
|
||||
? MVT::i16 : MVT::i8;
|
||||
MVT::ValueType VT = (strcmp(Modifier+6,"64") == 0) ?
|
||||
MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
|
||||
((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
|
||||
Reg = getX86SubSuperRegister(Reg, VT);
|
||||
}
|
||||
for (const char *Name = RI.get(Reg).Name; *Name; ++Name)
|
||||
|
@ -148,9 +149,11 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
|
|||
if (!isMemOp) O << '$';
|
||||
O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << "_"
|
||||
<< MO.getJumpTableIndex();
|
||||
if (Subtarget->isTargetDarwin() &&
|
||||
if (X86PICStyle == PICStyle::Stub &&
|
||||
TM.getRelocationModel() == Reloc::PIC_)
|
||||
O << "-\"L" << getFunctionNumber() << "$pb\"";
|
||||
if (Subtarget->is64Bit())
|
||||
O << "(%rip)";
|
||||
return;
|
||||
}
|
||||
case MachineOperand::MO_ConstantPoolIndex: {
|
||||
|
@ -158,7 +161,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
|
|||
if (!isMemOp) O << '$';
|
||||
O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
|
||||
<< MO.getConstantPoolIndex();
|
||||
if (Subtarget->isTargetDarwin() &&
|
||||
if (X86PICStyle == PICStyle::Stub &&
|
||||
TM.getRelocationModel() == Reloc::PIC_)
|
||||
O << "-\"L" << getFunctionNumber() << "$pb\"";
|
||||
int Offset = MO.getOffset();
|
||||
|
@ -166,47 +169,59 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
|
|||
O << "+" << Offset;
|
||||
else if (Offset < 0)
|
||||
O << Offset;
|
||||
|
||||
if (Subtarget->is64Bit())
|
||||
O << "(%rip)";
|
||||
return;
|
||||
}
|
||||
case MachineOperand::MO_GlobalAddress: {
|
||||
bool isCallOp = Modifier && !strcmp(Modifier, "call");
|
||||
bool isMemOp = Modifier && !strcmp(Modifier, "mem");
|
||||
if (!isMemOp && !isCallOp) O << '$';
|
||||
// Darwin block shameless ripped from PPCAsmPrinter.cpp
|
||||
if (Subtarget->isTargetDarwin() &&
|
||||
|
||||
GlobalValue *GV = MO.getGlobal();
|
||||
std::string Name = Mang->getValueName(GV);
|
||||
bool isExt = (GV->isExternal() || GV->hasWeakLinkage() ||
|
||||
GV->hasLinkOnceLinkage());
|
||||
if (X86PICStyle == PICStyle::Stub &&
|
||||
TM.getRelocationModel() != Reloc::Static) {
|
||||
GlobalValue *GV = MO.getGlobal();
|
||||
std::string Name = Mang->getValueName(GV);
|
||||
// Link-once, External, or Weakly-linked global variables need
|
||||
// non-lazily-resolved stubs
|
||||
if (GV->isExternal() || GV->hasWeakLinkage() ||
|
||||
GV->hasLinkOnceLinkage()) {
|
||||
if (isExt) {
|
||||
// Dynamically-resolved functions need a stub for the function.
|
||||
if (isCallOp && isa<Function>(GV) && cast<Function>(GV)->isExternal()) {
|
||||
if (isCallOp && isa<Function>(GV)) {
|
||||
FnStubs.insert(Name);
|
||||
O << "L" << Name << "$stub";
|
||||
} else {
|
||||
GVStubs.insert(Name);
|
||||
O << "L" << Name << "$non_lazy_ptr";
|
||||
}
|
||||
} else {
|
||||
O << Mang->getValueName(GV);
|
||||
}
|
||||
} else
|
||||
O << Name;
|
||||
if (!isCallOp && TM.getRelocationModel() == Reloc::PIC_)
|
||||
O << "-\"L" << getFunctionNumber() << "$pb\"";
|
||||
} else
|
||||
O << Mang->getValueName(MO.getGlobal());
|
||||
} else
|
||||
O << Name;
|
||||
|
||||
int Offset = MO.getOffset();
|
||||
if (Offset > 0)
|
||||
O << "+" << Offset;
|
||||
else if (Offset < 0)
|
||||
O << Offset;
|
||||
|
||||
if (!isCallOp &&
|
||||
Subtarget->is64Bit()) {
|
||||
if (isExt && TM.getRelocationModel() != Reloc::Static)
|
||||
O << "@GOTPCREL";
|
||||
O << "(%rip)";
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
case MachineOperand::MO_ExternalSymbol: {
|
||||
bool isCallOp = Modifier && !strcmp(Modifier, "call");
|
||||
if (isCallOp &&
|
||||
Subtarget->isTargetDarwin() &&
|
||||
X86PICStyle == PICStyle::Stub &&
|
||||
TM.getRelocationModel() != Reloc::Static) {
|
||||
std::string Name(TAI->getGlobalPrefix());
|
||||
Name += MO.getSymbolName();
|
||||
|
@ -216,6 +231,11 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
|
|||
}
|
||||
if (!isCallOp) O << '$';
|
||||
O << TAI->getGlobalPrefix() << MO.getSymbolName();
|
||||
|
||||
if (!isCallOp &&
|
||||
Subtarget->is64Bit())
|
||||
O << "(%rip)";
|
||||
|
||||
return;
|
||||
}
|
||||
default:
|
||||
|
@ -238,7 +258,8 @@ void X86ATTAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) {
|
|||
}
|
||||
}
|
||||
|
||||
void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
|
||||
void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
|
||||
const char *Modifier){
|
||||
assert(isMem(MI, Op) && "Invalid memory reference!");
|
||||
|
||||
const MachineOperand &BaseReg = MI->getOperand(Op);
|
||||
|
@ -266,12 +287,13 @@ void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
|
|||
|
||||
if (IndexReg.getReg() || BaseReg.getReg()) {
|
||||
O << "(";
|
||||
if (BaseReg.getReg())
|
||||
printOperand(MI, Op);
|
||||
if (BaseReg.getReg()) {
|
||||
printOperand(MI, Op, Modifier);
|
||||
}
|
||||
|
||||
if (IndexReg.getReg()) {
|
||||
O << ",";
|
||||
printOperand(MI, Op+2);
|
||||
printOperand(MI, Op+2, Modifier);
|
||||
if (ScaleVal != 1)
|
||||
O << "," << ScaleVal;
|
||||
}
|
||||
|
@ -350,43 +372,25 @@ bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
|
|||
///
|
||||
void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
|
||||
++EmittedInsts;
|
||||
// This works around some Darwin assembler bugs.
|
||||
if (Subtarget->isTargetDarwin()) {
|
||||
switch (MI->getOpcode()) {
|
||||
case X86::REP_MOVSB:
|
||||
O << "rep/movsb (%esi),(%edi)\n";
|
||||
return;
|
||||
case X86::REP_MOVSD:
|
||||
O << "rep/movsl (%esi),(%edi)\n";
|
||||
return;
|
||||
case X86::REP_MOVSW:
|
||||
O << "rep/movsw (%esi),(%edi)\n";
|
||||
return;
|
||||
case X86::REP_STOSB:
|
||||
O << "rep/stosb\n";
|
||||
return;
|
||||
case X86::REP_STOSD:
|
||||
O << "rep/stosl\n";
|
||||
return;
|
||||
case X86::REP_STOSW:
|
||||
O << "rep/stosw\n";
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// See if a truncate instruction can be turned into a nop.
|
||||
switch (MI->getOpcode()) {
|
||||
default: break;
|
||||
case X86::TRUNC_GR32_GR16:
|
||||
case X86::TRUNC_GR32_GR8:
|
||||
case X86::TRUNC_GR16_GR8: {
|
||||
case X86::TRUNC_64to32:
|
||||
case X86::TRUNC_64to16:
|
||||
case X86::TRUNC_32to16:
|
||||
case X86::TRUNC_32to8:
|
||||
case X86::TRUNC_16to8:
|
||||
case X86::TRUNC_32_to8:
|
||||
case X86::TRUNC_16_to8: {
|
||||
const MachineOperand &MO0 = MI->getOperand(0);
|
||||
const MachineOperand &MO1 = MI->getOperand(1);
|
||||
unsigned Reg0 = MO0.getReg();
|
||||
unsigned Reg1 = MO1.getReg();
|
||||
if (MI->getOpcode() == X86::TRUNC_GR32_GR16)
|
||||
unsigned Opc = MI->getOpcode();
|
||||
if (Opc == X86::TRUNC_64to32)
|
||||
Reg1 = getX86SubSuperRegister(Reg1, MVT::i32);
|
||||
else if (Opc == X86::TRUNC_32to16 || Opc == X86::TRUNC_64to16)
|
||||
Reg1 = getX86SubSuperRegister(Reg1, MVT::i16);
|
||||
else
|
||||
Reg1 = getX86SubSuperRegister(Reg1, MVT::i8);
|
||||
|
@ -395,6 +399,9 @@ void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
|
|||
O << "\n\t";
|
||||
break;
|
||||
}
|
||||
case X86::PsMOVZX64rr32:
|
||||
O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t";
|
||||
break;
|
||||
}
|
||||
|
||||
// Call the autogenerated instruction printer routines.
|
||||
|
|
|
@ -60,6 +60,9 @@ struct X86ATTAsmPrinter : public X86SharedAsmPrinter {
|
|||
void printf128mem(const MachineInstr *MI, unsigned OpNo) {
|
||||
printMemReference(MI, OpNo);
|
||||
}
|
||||
void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) {
|
||||
printMemReference(MI, OpNo, "subreg64");
|
||||
}
|
||||
|
||||
bool printAsmMRegister(const MachineOperand &MO, const char Mode);
|
||||
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
|
||||
|
@ -69,7 +72,8 @@ struct X86ATTAsmPrinter : public X86SharedAsmPrinter {
|
|||
|
||||
void printMachineInstruction(const MachineInstr *MI);
|
||||
void printSSECC(const MachineInstr *MI, unsigned Op);
|
||||
void printMemReference(const MachineInstr *MI, unsigned Op);
|
||||
void printMemReference(const MachineInstr *MI, unsigned Op,
|
||||
const char *Modifier=NULL);
|
||||
void printPICLabel(const MachineInstr *MI, unsigned Op);
|
||||
bool runOnMachineFunction(MachineFunction &F);
|
||||
};
|
||||
|
|
|
@ -30,8 +30,12 @@ Statistic<> llvm::EmittedInsts("asm-printer",
|
|||
"Number of machine instrs printed");
|
||||
|
||||
/// doInitialization
|
||||
bool X86SharedAsmPrinter::doInitialization(Module &M) {
|
||||
bool X86SharedAsmPrinter::doInitialization(Module &M) {
|
||||
if (Subtarget->isTargetDarwin()) {
|
||||
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
|
||||
if (!Subtarget->is64Bit())
|
||||
X86PICStyle = PICStyle::Stub;
|
||||
|
||||
// Emit initial debug information.
|
||||
DW.BeginModule(&M);
|
||||
}
|
||||
|
|
|
@ -29,12 +29,19 @@ namespace llvm {
|
|||
|
||||
extern Statistic<> EmittedInsts;
|
||||
|
||||
// FIXME: Move this to CodeGen/AsmPrinter.h
|
||||
namespace PICStyle {
|
||||
enum X86AsmPICStyle {
|
||||
Stub, GOT
|
||||
};
|
||||
}
|
||||
|
||||
struct VISIBILITY_HIDDEN X86SharedAsmPrinter : public AsmPrinter {
|
||||
DwarfWriter DW;
|
||||
|
||||
X86SharedAsmPrinter(std::ostream &O, X86TargetMachine &TM,
|
||||
const TargetAsmInfo *T)
|
||||
: AsmPrinter(O, TM, T), DW(O, this, T) {
|
||||
: AsmPrinter(O, TM, T), DW(O, this, T), X86PICStyle(PICStyle::GOT) {
|
||||
Subtarget = &TM.getSubtarget<X86Subtarget>();
|
||||
}
|
||||
|
||||
|
@ -49,6 +56,8 @@ struct VISIBILITY_HIDDEN X86SharedAsmPrinter : public AsmPrinter {
|
|||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
PICStyle::X86AsmPICStyle X86PICStyle;
|
||||
|
||||
const X86Subtarget *Subtarget;
|
||||
|
||||
// Necessary for Darwin to print out the apprioriate types of linker stubs
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "X86InstrInfo.h"
|
||||
#include "X86Subtarget.h"
|
||||
#include "X86TargetMachine.h"
|
||||
#include "X86Relocations.h"
|
||||
#include "X86.h"
|
||||
|
@ -35,14 +37,16 @@ namespace {
|
|||
namespace {
|
||||
class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass {
|
||||
const X86InstrInfo *II;
|
||||
TargetMachine &TM;
|
||||
const TargetData *TD;
|
||||
TargetMachine &TM;
|
||||
MachineCodeEmitter &MCE;
|
||||
bool Is64BitMode;
|
||||
public:
|
||||
explicit Emitter(TargetMachine &tm, MachineCodeEmitter &mce)
|
||||
: II(0), TM(tm), MCE(mce) {}
|
||||
: II(0), TD(0), TM(tm), MCE(mce), Is64BitMode(false) {}
|
||||
Emitter(TargetMachine &tm, MachineCodeEmitter &mce,
|
||||
const X86InstrInfo& ii)
|
||||
: II(&ii), TM(tm), MCE(mce) {}
|
||||
const X86InstrInfo &ii, const TargetData &td, bool is64)
|
||||
: II(&ii), TD(&td), TM(tm), MCE(mce), Is64BitMode(is64) {}
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
|
@ -54,20 +58,29 @@ namespace {
|
|||
|
||||
private:
|
||||
void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
|
||||
void emitPCRelativeValue(unsigned Address);
|
||||
void emitGlobalAddressForCall(GlobalValue *GV, bool isTailCall);
|
||||
void emitGlobalAddressForPtr(GlobalValue *GV, int Disp = 0);
|
||||
void emitPCRelativeValue(intptr_t Address);
|
||||
void emitGlobalAddressForCall(GlobalValue *GV, bool DoesntNeedStub);
|
||||
void emitGlobalAddressForPtr(GlobalValue *GV, bool isPCRelative,
|
||||
int Disp = 0, unsigned PCAdj = 0);
|
||||
void emitExternalSymbolAddress(const char *ES, bool isPCRelative);
|
||||
void emitPCRelativeConstPoolAddress(unsigned CPI, int Disp = 0,
|
||||
unsigned PCAdj = 0);
|
||||
void emitPCRelativeJumpTableAddress(unsigned JTI, unsigned PCAdj = 0);
|
||||
|
||||
void emitDisplacementField(const MachineOperand *RelocOp, int DispVal);
|
||||
void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
|
||||
unsigned PCAdj = 0);
|
||||
|
||||
void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
|
||||
void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
|
||||
void emitConstant(unsigned Val, unsigned Size);
|
||||
void emitConstant(uint64_t Val, unsigned Size);
|
||||
|
||||
void emitMemModRMByte(const MachineInstr &MI,
|
||||
unsigned Op, unsigned RegOpcodeField);
|
||||
unsigned Op, unsigned RegOpcodeField,
|
||||
unsigned PCAdj = 0);
|
||||
|
||||
unsigned getX86RegNum(unsigned RegNo);
|
||||
bool isX86_64ExtendedReg(const MachineOperand &MO);
|
||||
unsigned determineREX(const MachineInstr &MI);
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -83,6 +96,9 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) {
|
|||
MF.getTarget().getRelocationModel() != Reloc::Static) &&
|
||||
"JIT relocation model must be set to static or default!");
|
||||
II = ((X86TargetMachine&)MF.getTarget()).getInstrInfo();
|
||||
TD = ((X86TargetMachine&)MF.getTarget()).getTargetData();
|
||||
Is64BitMode =
|
||||
((X86TargetMachine&)MF.getTarget()).getSubtarget<X86Subtarget>().is64Bit();
|
||||
|
||||
do {
|
||||
MCE.startFunction(MF);
|
||||
|
@ -98,9 +114,9 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) {
|
|||
return false;
|
||||
}
|
||||
|
||||
/// emitPCRelativeValue - Emit a 32-bit PC relative address.
|
||||
/// emitPCRelativeValue - Emit a PC relative address.
|
||||
///
|
||||
void Emitter::emitPCRelativeValue(unsigned Address) {
|
||||
void Emitter::emitPCRelativeValue(intptr_t Address) {
|
||||
MCE.emitWordLE(Address-MCE.getCurrentPCValue()-4);
|
||||
}
|
||||
|
||||
|
@ -119,20 +135,22 @@ void Emitter::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
|
|||
/// emitGlobalAddressForCall - Emit the specified address to the code stream
|
||||
/// assuming this is part of a function call, which is PC relative.
|
||||
///
|
||||
void Emitter::emitGlobalAddressForCall(GlobalValue *GV, bool isTailCall) {
|
||||
void Emitter::emitGlobalAddressForCall(GlobalValue *GV, bool DoesntNeedStub) {
|
||||
MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
|
||||
X86::reloc_pcrel_word, GV, 0,
|
||||
!isTailCall /*Doesn'tNeedStub*/));
|
||||
DoesntNeedStub));
|
||||
MCE.emitWordLE(0);
|
||||
}
|
||||
|
||||
/// emitGlobalAddress - Emit the specified address to the code stream assuming
|
||||
/// this is part of a "take the address of a global" instruction, which is not
|
||||
/// PC relative.
|
||||
/// this is part of a "take the address of a global" instruction.
|
||||
///
|
||||
void Emitter::emitGlobalAddressForPtr(GlobalValue *GV, int Disp /* = 0 */) {
|
||||
MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(),
|
||||
X86::reloc_absolute_word, GV));
|
||||
void Emitter::emitGlobalAddressForPtr(GlobalValue *GV, bool isPCRelative,
|
||||
int Disp /* = 0 */,
|
||||
unsigned PCAdj /* = 0 */) {
|
||||
unsigned rt = isPCRelative ? X86::reloc_pcrel_word : X86::reloc_absolute_word;
|
||||
MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), rt,
|
||||
GV, PCAdj));
|
||||
MCE.emitWordLE(Disp); // The relocated value will be added to the displacement
|
||||
}
|
||||
|
||||
|
@ -145,6 +163,26 @@ void Emitter::emitExternalSymbolAddress(const char *ES, bool isPCRelative) {
|
|||
MCE.emitWordLE(0);
|
||||
}
|
||||
|
||||
/// emitPCRelativeConstPoolAddress - Arrange for the address of an constant pool
|
||||
/// to be emitted to the current location in the function, and allow it to be PC
|
||||
/// relative.
|
||||
void Emitter::emitPCRelativeConstPoolAddress(unsigned CPI, int Disp /* = 0 */,
|
||||
unsigned PCAdj /* = 0 */) {
|
||||
MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
|
||||
X86::reloc_pcrel_word, CPI, PCAdj));
|
||||
MCE.emitWordLE(Disp); // The relocated value will be added to the displacement
|
||||
}
|
||||
|
||||
/// emitPCRelativeJumpTableAddress - Arrange for the address of a jump table to
|
||||
/// be emitted to the current location in the function, and allow it to be PC
|
||||
/// relative.
|
||||
void Emitter::emitPCRelativeJumpTableAddress(unsigned JTI,
|
||||
unsigned PCAdj /* = 0 */) {
|
||||
MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
|
||||
X86::reloc_pcrel_word, JTI, PCAdj));
|
||||
MCE.emitWordLE(0); // The relocated value will be added to the displacement
|
||||
}
|
||||
|
||||
/// N86 namespace - Native X86 Register numbers... used by X86 backend.
|
||||
///
|
||||
namespace N86 {
|
||||
|
@ -153,28 +191,53 @@ namespace N86 {
|
|||
};
|
||||
}
|
||||
|
||||
|
||||
// getX86RegNum - This function maps LLVM register identifiers to their X86
|
||||
// specific numbering, which is used in various places encoding instructions.
|
||||
//
|
||||
static unsigned getX86RegNum(unsigned RegNo) {
|
||||
unsigned Emitter::getX86RegNum(unsigned RegNo) {
|
||||
switch(RegNo) {
|
||||
case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
|
||||
case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
|
||||
case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
|
||||
case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
|
||||
case X86::ESP: case X86::SP: case X86::AH: return N86::ESP;
|
||||
case X86::EBP: case X86::BP: case X86::CH: return N86::EBP;
|
||||
case X86::ESI: case X86::SI: case X86::DH: return N86::ESI;
|
||||
case X86::EDI: case X86::DI: case X86::BH: return N86::EDI;
|
||||
case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
|
||||
case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
|
||||
case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
|
||||
case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
|
||||
case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
|
||||
return N86::ESP;
|
||||
case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
|
||||
return N86::EBP;
|
||||
case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
|
||||
return N86::ESI;
|
||||
case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
|
||||
return N86::EDI;
|
||||
|
||||
case X86::R8: case X86::R8D: case X86::R8W: case X86::R8B:
|
||||
return N86::EAX;
|
||||
case X86::R9: case X86::R9D: case X86::R9W: case X86::R9B:
|
||||
return N86::ECX;
|
||||
case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
|
||||
return N86::EDX;
|
||||
case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
|
||||
return N86::EBX;
|
||||
case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
|
||||
return N86::ESP;
|
||||
case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
|
||||
return N86::EBP;
|
||||
case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
|
||||
return N86::ESI;
|
||||
case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
|
||||
return N86::EDI;
|
||||
|
||||
case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
|
||||
case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
|
||||
return RegNo-X86::ST0;
|
||||
|
||||
case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
|
||||
case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7:
|
||||
return RegNo-X86::XMM0;
|
||||
case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
|
||||
case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7:
|
||||
return II->getRegisterInfo().getDwarfRegNum(RegNo) -
|
||||
II->getRegisterInfo().getDwarfRegNum(X86::XMM0);
|
||||
case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
|
||||
case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
|
||||
return II->getRegisterInfo().getDwarfRegNum(RegNo) -
|
||||
II->getRegisterInfo().getDwarfRegNum(X86::XMM8);
|
||||
|
||||
default:
|
||||
assert(MRegisterInfo::isVirtualRegister(RegNo) &&
|
||||
|
@ -199,7 +262,7 @@ void Emitter::emitSIBByte(unsigned SS, unsigned Index, unsigned Base) {
|
|||
MCE.emitByte(ModRMByte(SS, Index, Base));
|
||||
}
|
||||
|
||||
void Emitter::emitConstant(unsigned Val, unsigned Size) {
|
||||
void Emitter::emitConstant(uint64_t Val, unsigned Size) {
|
||||
// Output the constant in little endian byte order...
|
||||
for (unsigned i = 0; i != Size; ++i) {
|
||||
MCE.emitByte(Val & 255);
|
||||
|
@ -214,7 +277,7 @@ static bool isDisp8(int Value) {
|
|||
}
|
||||
|
||||
void Emitter::emitDisplacementField(const MachineOperand *RelocOp,
|
||||
int DispVal) {
|
||||
int DispVal, unsigned PCAdj) {
|
||||
// If this is a simple integer displacement that doesn't require a relocation,
|
||||
// emit it now.
|
||||
if (!RelocOp) {
|
||||
|
@ -225,14 +288,27 @@ void Emitter::emitDisplacementField(const MachineOperand *RelocOp,
|
|||
// Otherwise, this is something that requires a relocation. Emit it as such
|
||||
// now.
|
||||
if (RelocOp->isGlobalAddress()) {
|
||||
emitGlobalAddressForPtr(RelocOp->getGlobal(), RelocOp->getOffset());
|
||||
// In 64-bit static small code model, we could potentially emit absolute.
|
||||
// But it's probably not beneficial.
|
||||
// 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
|
||||
// 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
|
||||
emitGlobalAddressForPtr(RelocOp->getGlobal(), Is64BitMode,
|
||||
RelocOp->getOffset(), PCAdj);
|
||||
} else if (RelocOp->isConstantPoolIndex()) {
|
||||
// Must be in 64-bit mode.
|
||||
emitPCRelativeConstPoolAddress(RelocOp->getConstantPoolIndex(),
|
||||
RelocOp->getOffset(), PCAdj);
|
||||
} else if (RelocOp->isJumpTableIndex()) {
|
||||
// Must be in 64-bit mode.
|
||||
emitPCRelativeJumpTableAddress(RelocOp->getJumpTableIndex(), PCAdj);
|
||||
} else {
|
||||
assert(0 && "Unknown value to relocate!");
|
||||
}
|
||||
}
|
||||
|
||||
void Emitter::emitMemModRMByte(const MachineInstr &MI,
|
||||
unsigned Op, unsigned RegOpcodeField) {
|
||||
unsigned Op, unsigned RegOpcodeField,
|
||||
unsigned PCAdj) {
|
||||
const MachineOperand &Op3 = MI.getOperand(Op+3);
|
||||
int DispVal = 0;
|
||||
const MachineOperand *DispForReloc = 0;
|
||||
|
@ -241,10 +317,18 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI,
|
|||
if (Op3.isGlobalAddress()) {
|
||||
DispForReloc = &Op3;
|
||||
} else if (Op3.isConstantPoolIndex()) {
|
||||
DispVal += MCE.getConstantPoolEntryAddress(Op3.getConstantPoolIndex());
|
||||
DispVal += Op3.getOffset();
|
||||
if (Is64BitMode) {
|
||||
DispForReloc = &Op3;
|
||||
} else {
|
||||
DispVal += MCE.getConstantPoolEntryAddress(Op3.getConstantPoolIndex());
|
||||
DispVal += Op3.getOffset();
|
||||
}
|
||||
} else if (Op3.isJumpTableIndex()) {
|
||||
DispVal += MCE.getJumpTableEntryAddress(Op3.getJumpTableIndex());
|
||||
if (Is64BitMode) {
|
||||
DispForReloc = &Op3;
|
||||
} else {
|
||||
DispVal += MCE.getJumpTableEntryAddress(Op3.getJumpTableIndex());
|
||||
}
|
||||
} else {
|
||||
DispVal = Op3.getImm();
|
||||
}
|
||||
|
@ -256,12 +340,13 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI,
|
|||
unsigned BaseReg = Base.getReg();
|
||||
|
||||
// Is a SIB byte needed?
|
||||
if (IndexReg.getReg() == 0 && BaseReg != X86::ESP) {
|
||||
if (IndexReg.getReg() == 0 &&
|
||||
(BaseReg == 0 || getX86RegNum(BaseReg) != N86::ESP)) {
|
||||
if (BaseReg == 0) { // Just a displacement?
|
||||
// Emit special case [disp32] encoding
|
||||
MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
|
||||
|
||||
emitDisplacementField(DispForReloc, DispVal);
|
||||
emitDisplacementField(DispForReloc, DispVal, PCAdj);
|
||||
} else {
|
||||
unsigned BaseRegNo = getX86RegNum(BaseReg);
|
||||
if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
|
||||
|
@ -274,12 +359,13 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI,
|
|||
} else {
|
||||
// Emit the most general non-SIB encoding: [REG+disp32]
|
||||
MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
|
||||
emitDisplacementField(DispForReloc, DispVal);
|
||||
emitDisplacementField(DispForReloc, DispVal, PCAdj);
|
||||
}
|
||||
}
|
||||
|
||||
} else { // We need a SIB byte, so start by outputting the ModR/M byte first
|
||||
assert(IndexReg.getReg() != X86::ESP && "Cannot use ESP as index reg!");
|
||||
assert(IndexReg.getReg() != X86::ESP &&
|
||||
IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
|
||||
|
||||
bool ForceDisp32 = false;
|
||||
bool ForceDisp8 = false;
|
||||
|
@ -292,7 +378,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI,
|
|||
// Emit the normal disp32 encoding.
|
||||
MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
|
||||
ForceDisp32 = true;
|
||||
} else if (DispVal == 0 && BaseReg != X86::EBP) {
|
||||
} else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) {
|
||||
// Emit no displacement ModR/M byte
|
||||
MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
|
||||
} else if (isDisp8(DispVal)) {
|
||||
|
@ -327,7 +413,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI,
|
|||
if (ForceDisp8) {
|
||||
emitConstant(DispVal, 1);
|
||||
} else if (DispVal != 0 || ForceDisp32) {
|
||||
emitDisplacementField(DispForReloc, DispVal);
|
||||
emitDisplacementField(DispForReloc, DispVal, PCAdj);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -337,11 +423,131 @@ static unsigned sizeOfImm(const TargetInstrDescriptor &Desc) {
|
|||
case X86II::Imm8: return 1;
|
||||
case X86II::Imm16: return 2;
|
||||
case X86II::Imm32: return 4;
|
||||
case X86II::Imm64: return 8;
|
||||
default: assert(0 && "Immediate size not set!");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register?
|
||||
/// e.g. r8, xmm8, etc.
|
||||
bool Emitter::isX86_64ExtendedReg(const MachineOperand &MO) {
|
||||
if (!MO.isRegister()) return false;
|
||||
unsigned RegNo = MO.getReg();
|
||||
int DWNum = II->getRegisterInfo().getDwarfRegNum(RegNo);
|
||||
if (DWNum >= II->getRegisterInfo().getDwarfRegNum(X86::R8) &&
|
||||
DWNum <= II->getRegisterInfo().getDwarfRegNum(X86::R15))
|
||||
return true;
|
||||
if (DWNum >= II->getRegisterInfo().getDwarfRegNum(X86::XMM8) &&
|
||||
DWNum <= II->getRegisterInfo().getDwarfRegNum(X86::XMM15))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
inline static bool isX86_64TruncToByte(unsigned oc) {
|
||||
return (oc == X86::TRUNC_64to8 || oc == X86::TRUNC_32to8 ||
|
||||
oc == X86::TRUNC_16to8);
|
||||
}
|
||||
|
||||
|
||||
inline static bool isX86_64NonExtLowByteReg(unsigned reg) {
|
||||
return (reg == X86::SPL || reg == X86::BPL ||
|
||||
reg == X86::SIL || reg == X86::DIL);
|
||||
}
|
||||
|
||||
/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
|
||||
/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
|
||||
/// size, and 3) use of X86-64 extended registers.
|
||||
unsigned Emitter::determineREX(const MachineInstr &MI) {
|
||||
unsigned REX = 0;
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
const TargetInstrDescriptor &Desc = II->get(Opcode);
|
||||
|
||||
// Pseudo instructions do not need REX prefix byte.
|
||||
if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
|
||||
return 0;
|
||||
if (Desc.TSFlags & X86II::REX_W)
|
||||
REX |= 1 << 3;
|
||||
|
||||
if (MI.getNumOperands()) {
|
||||
// If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
|
||||
bool isTrunc8 = isX86_64TruncToByte(Opcode);
|
||||
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand& MO = MI.getOperand(i);
|
||||
if (MO.isRegister()) {
|
||||
unsigned Reg = MO.getReg();
|
||||
// Trunc to byte are actually movb. The real source operand is the low
|
||||
// byte of the register.
|
||||
if (isTrunc8 && i == 1)
|
||||
Reg = getX86SubSuperRegister(Reg, MVT::i8);
|
||||
if (isX86_64NonExtLowByteReg(Reg))
|
||||
REX |= 0x40;
|
||||
}
|
||||
}
|
||||
|
||||
switch (Desc.TSFlags & X86II::FormMask) {
|
||||
case X86II::MRMInitReg:
|
||||
if (isX86_64ExtendedReg(MI.getOperand(0)))
|
||||
REX |= (1 << 0) | (1 << 2);
|
||||
break;
|
||||
case X86II::MRMSrcReg: {
|
||||
if (isX86_64ExtendedReg(MI.getOperand(0)))
|
||||
REX |= 1 << 2;
|
||||
for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand& MO = MI.getOperand(i);
|
||||
if (isX86_64ExtendedReg(MO))
|
||||
REX |= 1 << 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case X86II::MRMSrcMem: {
|
||||
if (isX86_64ExtendedReg(MI.getOperand(0)))
|
||||
REX |= 1 << 2;
|
||||
unsigned Bit = 0;
|
||||
for (unsigned i = 1; i != 5; ++i) {
|
||||
const MachineOperand& MO = MI.getOperand(i);
|
||||
if (MO.isRegister()) {
|
||||
if (isX86_64ExtendedReg(MO))
|
||||
REX |= 1 << Bit;
|
||||
Bit++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case X86II::MRM0m: case X86II::MRM1m:
|
||||
case X86II::MRM2m: case X86II::MRM3m:
|
||||
case X86II::MRM4m: case X86II::MRM5m:
|
||||
case X86II::MRM6m: case X86II::MRM7m:
|
||||
case X86II::MRMDestMem: {
|
||||
if (MI.getNumOperands() >= 5 &&
|
||||
isX86_64ExtendedReg(MI.getOperand(4)))
|
||||
REX |= 1 << 2;
|
||||
unsigned Bit = 0;
|
||||
for (unsigned i = 0; i != 4; ++i) {
|
||||
const MachineOperand& MO = MI.getOperand(i);
|
||||
if (MO.isRegister()) {
|
||||
if (isX86_64ExtendedReg(MO))
|
||||
REX |= 1 << Bit;
|
||||
Bit++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
if (isX86_64ExtendedReg(MI.getOperand(0)))
|
||||
REX |= 1 << 0;
|
||||
for (unsigned i = 1, e = MI.getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand& MO = MI.getOperand(i);
|
||||
if (isX86_64ExtendedReg(MO))
|
||||
REX |= 1 << 2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return REX;
|
||||
}
|
||||
|
||||
void Emitter::emitInstruction(const MachineInstr &MI) {
|
||||
NumEmitted++; // Keep track of the # of mi's emitted
|
||||
|
||||
|
@ -354,18 +560,22 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
|
|||
// Emit the operand size opcode prefix as needed.
|
||||
if (Desc.TSFlags & X86II::OpSize) MCE.emitByte(0x66);
|
||||
|
||||
// Emit the address size opcode prefix as needed.
|
||||
if (Desc.TSFlags & X86II::AdSize) MCE.emitByte(0x67);
|
||||
|
||||
bool Need0FPrefix = false;
|
||||
switch (Desc.TSFlags & X86II::Op0Mask) {
|
||||
case X86II::TB:
|
||||
MCE.emitByte(0x0F); // Two-byte opcode prefix
|
||||
Need0FPrefix = true; // Two-byte opcode prefix
|
||||
break;
|
||||
case X86II::REP: break; // already handled.
|
||||
case X86II::XS: // F3 0F
|
||||
MCE.emitByte(0xF3);
|
||||
MCE.emitByte(0x0F);
|
||||
Need0FPrefix = true;
|
||||
break;
|
||||
case X86II::XD: // F2 0F
|
||||
MCE.emitByte(0xF2);
|
||||
MCE.emitByte(0x0F);
|
||||
Need0FPrefix = true;
|
||||
break;
|
||||
case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
|
||||
case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
|
||||
|
@ -377,6 +587,17 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
|
|||
case 0: break; // No prefix!
|
||||
}
|
||||
|
||||
if (Is64BitMode) {
|
||||
// REX prefix
|
||||
unsigned REX = determineREX(MI);
|
||||
if (REX)
|
||||
MCE.emitByte(0x40 | REX);
|
||||
}
|
||||
|
||||
// 0x0F escape code must be emitted just before the opcode.
|
||||
if (Need0FPrefix)
|
||||
MCE.emitByte(0x0F);
|
||||
|
||||
// If this is a two-address instruction, skip one of the register operands.
|
||||
unsigned CurOp = 0;
|
||||
CurOp += (Desc.Flags & M_2_ADDR_FLAG) != 0;
|
||||
|
@ -397,6 +618,7 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
|
|||
case X86::IMPLICIT_DEF_GR8:
|
||||
case X86::IMPLICIT_DEF_GR16:
|
||||
case X86::IMPLICIT_DEF_GR32:
|
||||
case X86::IMPLICIT_DEF_GR64:
|
||||
case X86::IMPLICIT_DEF_FR32:
|
||||
case X86::IMPLICIT_DEF_FR64:
|
||||
case X86::IMPLICIT_DEF_VR64:
|
||||
|
@ -417,7 +639,7 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
|
|||
} else if (MO.isGlobalAddress()) {
|
||||
bool isTailCall = Opcode == X86::TAILJMPd ||
|
||||
Opcode == X86::TAILJMPr || Opcode == X86::TAILJMPm;
|
||||
emitGlobalAddressForCall(MO.getGlobal(), isTailCall);
|
||||
emitGlobalAddressForCall(MO.getGlobal(), !isTailCall);
|
||||
} else if (MO.isExternalSymbol()) {
|
||||
emitExternalSymbolAddress(MO.getSymbolName(), true);
|
||||
} else if (MO.isImmediate()) {
|
||||
|
@ -434,15 +656,15 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
|
|||
if (CurOp != MI.getNumOperands()) {
|
||||
const MachineOperand &MO1 = MI.getOperand(CurOp++);
|
||||
if (MO1.isGlobalAddress()) {
|
||||
assert(sizeOfImm(Desc) == 4 &&
|
||||
assert(sizeOfImm(Desc) == TD->getPointerSize() &&
|
||||
"Don't know how to emit non-pointer values!");
|
||||
emitGlobalAddressForPtr(MO1.getGlobal(), MO1.getOffset());
|
||||
emitGlobalAddressForPtr(MO1.getGlobal(), Is64BitMode, MO1.getOffset());
|
||||
} else if (MO1.isExternalSymbol()) {
|
||||
assert(sizeOfImm(Desc) == 4 &&
|
||||
assert(sizeOfImm(Desc) == TD->getPointerSize() &&
|
||||
"Don't know how to emit non-pointer values!");
|
||||
emitExternalSymbolAddress(MO1.getSymbolName(), false);
|
||||
} else if (MO1.isJumpTableIndex()) {
|
||||
assert(sizeOfImm(Desc) == 4 &&
|
||||
assert(sizeOfImm(Desc) == TD->getPointerSize() &&
|
||||
"Don't know how to emit non-pointer values!");
|
||||
emitConstant(MCE.getJumpTableEntryAddress(MO1.getJumpTableIndex()), 4);
|
||||
} else {
|
||||
|
@ -460,13 +682,14 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
|
|||
emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
|
||||
break;
|
||||
}
|
||||
case X86II::MRMDestMem:
|
||||
case X86II::MRMDestMem: {
|
||||
MCE.emitByte(BaseOpcode);
|
||||
emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(CurOp+4).getReg()));
|
||||
CurOp += 5;
|
||||
if (CurOp != MI.getNumOperands())
|
||||
emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
|
||||
break;
|
||||
}
|
||||
|
||||
case X86II::MRMSrcReg:
|
||||
MCE.emitByte(BaseOpcode);
|
||||
|
@ -477,13 +700,17 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
|
|||
emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
|
||||
break;
|
||||
|
||||
case X86II::MRMSrcMem:
|
||||
case X86II::MRMSrcMem: {
|
||||
unsigned PCAdj = (CurOp+5 != MI.getNumOperands()) ? sizeOfImm(Desc) : 0;
|
||||
|
||||
MCE.emitByte(BaseOpcode);
|
||||
emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()));
|
||||
emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()),
|
||||
PCAdj);
|
||||
CurOp += 5;
|
||||
if (CurOp != MI.getNumOperands())
|
||||
emitConstant(MI.getOperand(CurOp++).getImm(), sizeOfImm(Desc));
|
||||
break;
|
||||
}
|
||||
|
||||
case X86II::MRM0r: case X86II::MRM1r:
|
||||
case X86II::MRM2r: case X86II::MRM3r:
|
||||
|
@ -500,9 +727,13 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
|
|||
case X86II::MRM0m: case X86II::MRM1m:
|
||||
case X86II::MRM2m: case X86II::MRM3m:
|
||||
case X86II::MRM4m: case X86II::MRM5m:
|
||||
case X86II::MRM6m: case X86II::MRM7m:
|
||||
case X86II::MRM6m: case X86II::MRM7m: {
|
||||
unsigned PCAdj = (CurOp+4 != MI.getNumOperands()) ?
|
||||
(MI.getOperand(CurOp+4).isImmediate() ? sizeOfImm(Desc) : 4) : 0;
|
||||
|
||||
MCE.emitByte(BaseOpcode);
|
||||
emitMemModRMByte(MI, CurOp, (Desc.TSFlags & X86II::FormMask)-X86II::MRM0m);
|
||||
emitMemModRMByte(MI, CurOp, (Desc.TSFlags & X86II::FormMask)-X86II::MRM0m,
|
||||
PCAdj);
|
||||
CurOp += 4;
|
||||
|
||||
if (CurOp != MI.getNumOperands()) {
|
||||
|
@ -510,13 +741,14 @@ void Emitter::emitInstruction(const MachineInstr &MI) {
|
|||
if (MO.isImmediate())
|
||||
emitConstant(MO.getImm(), sizeOfImm(Desc));
|
||||
else if (MO.isGlobalAddress())
|
||||
emitGlobalAddressForPtr(MO.getGlobal(), MO.getOffset());
|
||||
emitGlobalAddressForPtr(MO.getGlobal(), Is64BitMode, MO.getOffset());
|
||||
else if (MO.isJumpTableIndex())
|
||||
emitConstant(MCE.getJumpTableEntryAddress(MO.getJumpTableIndex()), 4);
|
||||
else
|
||||
assert(0 && "Unknown operand!");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case X86II::MRMInitReg:
|
||||
MCE.emitByte(BaseOpcode);
|
||||
|
|
|
@ -30,8 +30,9 @@
|
|||
#include "llvm/CodeGen/SSARegMap.h"
|
||||
#include "llvm/CodeGen/SelectionDAGISel.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include <deque>
|
||||
#include <iostream>
|
||||
|
@ -58,16 +59,19 @@ namespace {
|
|||
int FrameIndex;
|
||||
} Base;
|
||||
|
||||
bool isRIPRel; // RIP relative?
|
||||
unsigned Scale;
|
||||
SDOperand IndexReg;
|
||||
unsigned Disp;
|
||||
GlobalValue *GV;
|
||||
Constant *CP;
|
||||
const char *ES;
|
||||
int JT;
|
||||
unsigned Align; // CP alignment.
|
||||
|
||||
X86ISelAddressMode()
|
||||
: BaseType(RegBase), Scale(1), IndexReg(), Disp(0), GV(0),
|
||||
CP(0), Align(0) {
|
||||
: BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
|
||||
GV(0), CP(0), ES(0), JT(-1), Align(0) {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -92,6 +96,10 @@ namespace {
|
|||
///
|
||||
bool FastISel;
|
||||
|
||||
/// TM - Keep a reference to X86TargetMachine.
|
||||
///
|
||||
X86TargetMachine &TM;
|
||||
|
||||
/// X86Lowering - This object fully describes how to lower LLVM code to an
|
||||
/// X86-specific SelectionDAG.
|
||||
X86TargetLowering X86Lowering;
|
||||
|
@ -100,12 +108,14 @@ namespace {
|
|||
/// make the right decision when generating code for different targets.
|
||||
const X86Subtarget *Subtarget;
|
||||
|
||||
/// GlobalBaseReg - keeps track of the virtual register mapped onto global
|
||||
/// base register.
|
||||
unsigned GlobalBaseReg;
|
||||
|
||||
public:
|
||||
X86DAGToDAGISel(X86TargetMachine &TM, bool fast)
|
||||
X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
|
||||
: SelectionDAGISel(X86Lowering),
|
||||
ContainsFPCode(false), FastISel(fast),
|
||||
ContainsFPCode(false), FastISel(fast), TM(tm),
|
||||
X86Lowering(*TM.getTargetLowering()),
|
||||
Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
|
||||
|
||||
|
@ -156,13 +166,22 @@ namespace {
|
|||
SDOperand &Scale, SDOperand &Index,
|
||||
SDOperand &Disp) {
|
||||
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
|
||||
CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, MVT::i32) : AM.Base.Reg;
|
||||
CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
|
||||
AM.Base.Reg;
|
||||
Scale = getI8Imm(AM.Scale);
|
||||
Index = AM.IndexReg;
|
||||
Disp = AM.GV ? CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp)
|
||||
: (AM.CP ?
|
||||
CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp)
|
||||
: getI32Imm(AM.Disp));
|
||||
// These are 32-bit even in 64-bit mode since RIP relative offset
|
||||
// is 32-bit.
|
||||
if (AM.GV)
|
||||
Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
|
||||
else if (AM.CP)
|
||||
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp);
|
||||
else if (AM.ES)
|
||||
Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
|
||||
else if (AM.JT != -1)
|
||||
Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
|
||||
else
|
||||
Disp = getI32Imm(AM.Disp);
|
||||
}
|
||||
|
||||
/// getI8Imm - Return a target constant with the specified value, of type
|
||||
|
@ -476,26 +495,56 @@ void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {
|
|||
/// addressing mode
|
||||
bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
|
||||
bool isRoot) {
|
||||
// RIP relative addressing: %rip + 32-bit displacement!
|
||||
if (AM.isRIPRel) {
|
||||
if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
|
||||
uint64_t Val = cast<ConstantSDNode>(N)->getValue();
|
||||
if (isInt32(AM.Disp + Val)) {
|
||||
AM.Disp += Val;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int id = N.Val->getNodeId();
|
||||
bool Available = isSelected(id);
|
||||
|
||||
switch (N.getOpcode()) {
|
||||
default: break;
|
||||
case ISD::Constant:
|
||||
AM.Disp += cast<ConstantSDNode>(N)->getValue();
|
||||
return false;
|
||||
case ISD::Constant: {
|
||||
uint64_t Val = cast<ConstantSDNode>(N)->getValue();
|
||||
if (isInt32(AM.Disp + Val)) {
|
||||
AM.Disp += Val;
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case X86ISD::Wrapper:
|
||||
// If both base and index components have been picked, we can't fit
|
||||
// the result available in the register in the addressing mode. Duplicate
|
||||
// GlobalAddress or ConstantPool as displacement.
|
||||
if (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val)) {
|
||||
// If value is available in a register both base and index components have
|
||||
// been picked, we can't fit the result available in the register in the
|
||||
// addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
|
||||
|
||||
// Can't fit GV or CP in addressing mode for X86-64 medium or large code
|
||||
// model since the displacement field is 32-bit. Ok for small code model.
|
||||
|
||||
// For X86-64 PIC code, only allow GV / CP + displacement so we can use RIP
|
||||
// relative addressing mode.
|
||||
if ((!Subtarget->is64Bit() || TM.getCodeModel() == CodeModel::Small) &&
|
||||
(!Available || (AM.Base.Reg.Val && AM.IndexReg.Val))) {
|
||||
bool isRIP = Subtarget->is64Bit();
|
||||
if (isRIP && (AM.Base.Reg.Val || AM.Scale > 1 || AM.IndexReg.Val ||
|
||||
AM.BaseType == X86ISelAddressMode::FrameIndexBase))
|
||||
break;
|
||||
if (ConstantPoolSDNode *CP =
|
||||
dyn_cast<ConstantPoolSDNode>(N.getOperand(0))) {
|
||||
if (AM.CP == 0) {
|
||||
AM.CP = CP->get();
|
||||
AM.Align = CP->getAlignment();
|
||||
AM.Disp += CP->getOffset();
|
||||
if (isRIP)
|
||||
AM.isRIPRel = true;
|
||||
return false;
|
||||
}
|
||||
} else if (GlobalAddressSDNode *G =
|
||||
|
@ -503,6 +552,20 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
|
|||
if (AM.GV == 0) {
|
||||
AM.GV = G->getGlobal();
|
||||
AM.Disp += G->getOffset();
|
||||
if (isRIP)
|
||||
AM.isRIPRel = true;
|
||||
return false;
|
||||
}
|
||||
} else if (isRoot && isRIP) {
|
||||
if (ExternalSymbolSDNode *S =
|
||||
dyn_cast<ExternalSymbolSDNode>(N.getOperand(0))) {
|
||||
AM.ES = S->getSymbol();
|
||||
AM.isRIPRel = true;
|
||||
return false;
|
||||
} else if (JumpTableSDNode *J =
|
||||
dyn_cast<JumpTableSDNode>(N.getOperand(0))) {
|
||||
AM.JT = J->getIndex();
|
||||
AM.isRIPRel = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -533,7 +596,11 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
|
|||
AM.IndexReg = ShVal.Val->getOperand(0);
|
||||
ConstantSDNode *AddVal =
|
||||
cast<ConstantSDNode>(ShVal.Val->getOperand(1));
|
||||
AM.Disp += AddVal->getValue() << Val;
|
||||
uint64_t Disp = AM.Disp + AddVal->getValue() << Val;
|
||||
if (isInt32(Disp))
|
||||
AM.Disp = Disp;
|
||||
else
|
||||
AM.IndexReg = ShVal;
|
||||
} else {
|
||||
AM.IndexReg = ShVal;
|
||||
}
|
||||
|
@ -563,7 +630,11 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
|
|||
Reg = MulVal.Val->getOperand(0);
|
||||
ConstantSDNode *AddVal =
|
||||
cast<ConstantSDNode>(MulVal.Val->getOperand(1));
|
||||
AM.Disp += AddVal->getValue() * CN->getValue();
|
||||
uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue();
|
||||
if (isInt32(Disp))
|
||||
AM.Disp = Disp;
|
||||
else
|
||||
Reg = N.Val->getOperand(0);
|
||||
} else {
|
||||
Reg = N.Val->getOperand(0);
|
||||
}
|
||||
|
@ -641,13 +712,14 @@ bool X86DAGToDAGISel::SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
|
|||
if (MatchAddress(N, AM))
|
||||
return false;
|
||||
|
||||
MVT::ValueType VT = N.getValueType();
|
||||
if (AM.BaseType == X86ISelAddressMode::RegBase) {
|
||||
if (!AM.Base.Reg.Val)
|
||||
AM.Base.Reg = CurDAG->getRegister(0, MVT::i32);
|
||||
AM.Base.Reg = CurDAG->getRegister(0, VT);
|
||||
}
|
||||
|
||||
if (!AM.IndexReg.Val)
|
||||
AM.IndexReg = CurDAG->getRegister(0, MVT::i32);
|
||||
AM.IndexReg = CurDAG->getRegister(0, VT);
|
||||
|
||||
getAddressOperands(AM, Base, Scale, Index, Disp);
|
||||
return true;
|
||||
|
@ -662,19 +734,20 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base,
|
|||
if (MatchAddress(N, AM))
|
||||
return false;
|
||||
|
||||
MVT::ValueType VT = N.getValueType();
|
||||
unsigned Complexity = 0;
|
||||
if (AM.BaseType == X86ISelAddressMode::RegBase)
|
||||
if (AM.Base.Reg.Val)
|
||||
Complexity = 1;
|
||||
else
|
||||
AM.Base.Reg = CurDAG->getRegister(0, MVT::i32);
|
||||
AM.Base.Reg = CurDAG->getRegister(0, VT);
|
||||
else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
|
||||
Complexity = 4;
|
||||
|
||||
if (AM.IndexReg.Val)
|
||||
Complexity++;
|
||||
else
|
||||
AM.IndexReg = CurDAG->getRegister(0, MVT::i32);
|
||||
AM.IndexReg = CurDAG->getRegister(0, VT);
|
||||
|
||||
if (AM.Scale > 2)
|
||||
Complexity += 2;
|
||||
|
@ -687,8 +760,14 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base,
|
|||
// optimal (especially for code size consideration). LEA is nice because of
|
||||
// its three-address nature. Tweak the cost function again when we can run
|
||||
// convertToThreeAddress() at register allocation time.
|
||||
if (AM.GV || AM.CP)
|
||||
Complexity += 2;
|
||||
if (AM.GV || AM.CP || AM.ES || AM.JT != -1) {
|
||||
// For X86-64, we should always use lea to materialize RIP relative
|
||||
// addresses.
|
||||
if (Subtarget->is64Bit())
|
||||
Complexity = 4;
|
||||
else
|
||||
Complexity += 2;
|
||||
}
|
||||
|
||||
if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val))
|
||||
Complexity++;
|
||||
|
@ -721,6 +800,7 @@ static bool isRegister0(SDOperand Op) {
|
|||
/// base address to use for accessing globals into a register.
|
||||
///
|
||||
SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
|
||||
assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing");
|
||||
if (!GlobalBaseReg) {
|
||||
// Insert the set of GlobalBaseReg into the first MBB of the function
|
||||
MachineBasicBlock &FirstMBB = BB->getParent()->front();
|
||||
|
@ -732,7 +812,7 @@ SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
|
|||
BuildMI(FirstMBB, MBBI, X86::MovePCtoStack, 0);
|
||||
BuildMI(FirstMBB, MBBI, X86::POP32r, 1, GlobalBaseReg);
|
||||
}
|
||||
return CurDAG->getRegister(GlobalBaseReg, MVT::i32).Val;
|
||||
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val;
|
||||
}
|
||||
|
||||
static SDNode *FindCallStartFromCall(SDNode *Node) {
|
||||
|
@ -776,9 +856,11 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
|
|||
// Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
|
||||
// code and is matched first so to prevent it from being turned into
|
||||
// LEA32r X+c.
|
||||
// In 64-bit mode, use LEA to take advantage of RIP-relative addressing.
|
||||
MVT::ValueType PtrVT = TLI.getPointerTy();
|
||||
SDOperand N0 = N.getOperand(0);
|
||||
SDOperand N1 = N.getOperand(1);
|
||||
if (N.Val->getValueType(0) == MVT::i32 &&
|
||||
if (N.Val->getValueType(0) == PtrVT &&
|
||||
N0.getOpcode() == X86ISD::Wrapper &&
|
||||
N1.getOpcode() == ISD::Constant) {
|
||||
unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue();
|
||||
|
@ -786,17 +868,23 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
|
|||
// TODO: handle ExternalSymbolSDNode.
|
||||
if (GlobalAddressSDNode *G =
|
||||
dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) {
|
||||
C = CurDAG->getTargetGlobalAddress(G->getGlobal(), MVT::i32,
|
||||
C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT,
|
||||
G->getOffset() + Offset);
|
||||
} else if (ConstantPoolSDNode *CP =
|
||||
dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) {
|
||||
C = CurDAG->getTargetConstantPool(CP->get(), MVT::i32,
|
||||
C = CurDAG->getTargetConstantPool(CP->get(), PtrVT,
|
||||
CP->getAlignment(),
|
||||
CP->getOffset()+Offset);
|
||||
}
|
||||
|
||||
if (C.Val)
|
||||
return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, MVT::i32, C);
|
||||
if (C.Val) {
|
||||
if (Subtarget->is64Bit()) {
|
||||
SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1),
|
||||
CurDAG->getRegister(0, PtrVT), C };
|
||||
return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4);
|
||||
} else
|
||||
return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C);
|
||||
}
|
||||
}
|
||||
|
||||
// Other cases are handled by auto-generated code.
|
||||
|
@ -811,6 +899,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
|
|||
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
|
||||
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
|
||||
case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
|
||||
case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
|
||||
}
|
||||
else
|
||||
switch (NVT) {
|
||||
|
@ -818,6 +907,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
|
|||
case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
|
||||
case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
|
||||
case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
|
||||
case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
|
||||
}
|
||||
|
||||
unsigned LoReg, HiReg;
|
||||
|
@ -826,6 +916,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
|
|||
case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
|
||||
case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
|
||||
case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
|
||||
case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
|
||||
}
|
||||
|
||||
SDOperand N0 = Node->getOperand(0);
|
||||
|
@ -899,6 +990,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
|
|||
case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
|
||||
case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
|
||||
case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
|
||||
case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
|
||||
}
|
||||
else
|
||||
switch (NVT) {
|
||||
|
@ -906,6 +998,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
|
|||
case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
|
||||
case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
|
||||
case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
|
||||
case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
|
||||
}
|
||||
|
||||
unsigned LoReg, HiReg;
|
||||
|
@ -927,6 +1020,11 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
|
|||
ClrOpcode = X86::MOV32r0;
|
||||
SExtOpcode = X86::CDQ;
|
||||
break;
|
||||
case MVT::i64:
|
||||
LoReg = X86::RAX; HiReg = X86::RDX;
|
||||
ClrOpcode = X86::MOV64r0;
|
||||
SExtOpcode = X86::CQO;
|
||||
break;
|
||||
}
|
||||
|
||||
SDOperand N0 = Node->getOperand(0);
|
||||
|
@ -994,7 +1092,7 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
|
|||
}
|
||||
|
||||
case ISD::TRUNCATE: {
|
||||
if (NVT == MVT::i8) {
|
||||
if (!Subtarget->is64Bit() && NVT == MVT::i8) {
|
||||
unsigned Opc2;
|
||||
MVT::ValueType VT;
|
||||
switch (Node->getOperand(0).getValueType()) {
|
||||
|
@ -1002,12 +1100,12 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
|
|||
case MVT::i16:
|
||||
Opc = X86::MOV16to16_;
|
||||
VT = MVT::i16;
|
||||
Opc2 = X86::TRUNC_GR16_GR8;
|
||||
Opc2 = X86::TRUNC_16_to8;
|
||||
break;
|
||||
case MVT::i32:
|
||||
Opc = X86::MOV32to32_;
|
||||
VT = MVT::i32;
|
||||
Opc2 = X86::TRUNC_GR32_GR8;
|
||||
Opc2 = X86::TRUNC_32_to8;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -267,6 +267,9 @@ namespace llvm {
|
|||
// X86TargetLowering - X86 Implementation of the TargetLowering interface
|
||||
class X86TargetLowering : public TargetLowering {
|
||||
int VarArgsFrameIndex; // FrameIndex for start of varargs area.
|
||||
int RegSaveFrameIndex; // X86-64 vararg func register save area.
|
||||
unsigned VarArgsGPOffset; // X86-64 vararg func int reg offset.
|
||||
unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset.
|
||||
int ReturnAddrIndex; // FrameIndex for return slot.
|
||||
int BytesToPopOnReturn; // Number of arg bytes ret should pop.
|
||||
int BytesCallerReserves; // Number of arg bytes caller makes.
|
||||
|
@ -347,6 +350,9 @@ namespace llvm {
|
|||
/// make the right decision when generating code for different targets.
|
||||
const X86Subtarget *Subtarget;
|
||||
|
||||
/// X86StackPtr - X86 physical register used as stack ptr.
|
||||
unsigned X86StackPtr;
|
||||
|
||||
/// X86ScalarSSE - Select between SSE2 or x87 floating point ops.
|
||||
bool X86ScalarSSE;
|
||||
|
||||
|
@ -354,6 +360,10 @@ namespace llvm {
|
|||
SDOperand LowerCCCArguments(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG);
|
||||
|
||||
// X86-64 C Calling Convention implementation.
|
||||
SDOperand LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG);
|
||||
|
||||
// Fast Calling Convention implementation.
|
||||
SDOperand LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG);
|
||||
|
|
|
@ -22,7 +22,7 @@ using namespace llvm;
|
|||
|
||||
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
: TargetInstrInfo(X86Insts, sizeof(X86Insts)/sizeof(X86Insts[0])),
|
||||
TM(tm), RI(*this) {
|
||||
TM(tm), RI(tm, *this) {
|
||||
}
|
||||
|
||||
|
||||
|
@ -30,7 +30,8 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
|
|||
unsigned& sourceReg,
|
||||
unsigned& destReg) const {
|
||||
MachineOpCode oc = MI.getOpcode();
|
||||
if (oc == X86::MOV8rr || oc == X86::MOV16rr || oc == X86::MOV32rr ||
|
||||
if (oc == X86::MOV8rr || oc == X86::MOV16rr ||
|
||||
oc == X86::MOV32rr || oc == X86::MOV64rr ||
|
||||
oc == X86::MOV16to16_ || oc == X86::MOV32to32_ ||
|
||||
oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr ||
|
||||
oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr ||
|
||||
|
@ -59,6 +60,7 @@ unsigned X86InstrInfo::isLoadFromStackSlot(MachineInstr *MI,
|
|||
case X86::MOV16_rm:
|
||||
case X86::MOV32rm:
|
||||
case X86::MOV32_rm:
|
||||
case X86::MOV64rm:
|
||||
case X86::FpLD64m:
|
||||
case X86::MOVSSrm:
|
||||
case X86::MOVSDrm:
|
||||
|
@ -86,6 +88,7 @@ unsigned X86InstrInfo::isStoreToStackSlot(MachineInstr *MI,
|
|||
case X86::MOV16_mr:
|
||||
case X86::MOV32mr:
|
||||
case X86::MOV32_mr:
|
||||
case X86::MOV64mr:
|
||||
case X86::FpSTP64m:
|
||||
case X86::MOVSSmr:
|
||||
case X86::MOVSDmr:
|
||||
|
@ -145,16 +148,20 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr *MI) const {
|
|||
|
||||
switch (MI->getOpcode()) {
|
||||
case X86::INC32r:
|
||||
case X86::INC64_32r:
|
||||
assert(MI->getNumOperands() == 2 && "Unknown inc instruction!");
|
||||
return addRegOffset(BuildMI(X86::LEA32r, 5, Dest), Src, 1);
|
||||
case X86::INC16r:
|
||||
case X86::INC64_16r:
|
||||
if (DisableLEA16) return 0;
|
||||
assert(MI->getNumOperands() == 2 && "Unknown inc instruction!");
|
||||
return addRegOffset(BuildMI(X86::LEA16r, 5, Dest), Src, 1);
|
||||
case X86::DEC32r:
|
||||
case X86::DEC64_32r:
|
||||
assert(MI->getNumOperands() == 2 && "Unknown dec instruction!");
|
||||
return addRegOffset(BuildMI(X86::LEA32r, 5, Dest), Src, -1);
|
||||
case X86::DEC16r:
|
||||
case X86::DEC64_16r:
|
||||
if (DisableLEA16) return 0;
|
||||
assert(MI->getNumOperands() == 2 && "Unknown dec instruction!");
|
||||
return addRegOffset(BuildMI(X86::LEA16r, 5, Dest), Src, -1);
|
||||
|
@ -264,3 +271,10 @@ X86InstrInfo::reverseBranchCondition(MachineBasicBlock::iterator MI) const {
|
|||
return BuildMI(*MBB, MBB->erase(MI), ROpcode, 1).addMBB(TMBB);
|
||||
}
|
||||
|
||||
const TargetRegisterClass *X86InstrInfo::getPointerRegClass() const {
|
||||
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
|
||||
if (Subtarget->is64Bit())
|
||||
return &X86::GR64RegClass;
|
||||
else
|
||||
return &X86::GR32RegClass;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "X86RegisterInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
class X86RegisterInfo;
|
||||
class X86TargetMachine;
|
||||
|
||||
/// X86II - This namespace holds all of the target specific flags that
|
||||
|
@ -90,12 +91,18 @@ namespace X86II {
|
|||
// instead of 32 bit data.
|
||||
OpSize = 1 << 6,
|
||||
|
||||
// AsSize - Set if this instruction requires an operand size prefix (0x67),
|
||||
// which most often indicates that the instruction address 16 bit address
|
||||
// instead of 32 bit address (or 32 bit address in 64 bit mode).
|
||||
AdSize = 1 << 7,
|
||||
|
||||
//===------------------------------------------------------------------===//
|
||||
// Op0Mask - There are several prefix bytes that are used to form two byte
|
||||
// opcodes. These are currently 0x0F, 0xF3, and 0xD8-0xDF. This mask is
|
||||
// used to obtain the setting of this field. If no bits in this field is
|
||||
// set, there is no prefix byte for obtaining a multibyte opcode.
|
||||
//
|
||||
Op0Shift = 7,
|
||||
Op0Shift = 8,
|
||||
Op0Mask = 0xF << Op0Shift,
|
||||
|
||||
// TB - TwoByte - Set if this instruction has a two byte opcode, which
|
||||
|
@ -118,19 +125,29 @@ namespace X86II {
|
|||
XD = 11 << Op0Shift, XS = 12 << Op0Shift,
|
||||
|
||||
//===------------------------------------------------------------------===//
|
||||
// This two-bit field describes the size of an immediate operand. Zero is
|
||||
// REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
|
||||
// They are used to specify GPRs and SSE registers, 64-bit operand size,
|
||||
// etc. We only cares about REX.W and REX.R bits and only the former is
|
||||
// statically determined.
|
||||
//
|
||||
REXShift = 12,
|
||||
REX_W = 1 << REXShift,
|
||||
|
||||
//===------------------------------------------------------------------===//
|
||||
// This three-bit field describes the size of an immediate operand. Zero is
|
||||
// unused so that we can tell if we forgot to set a value.
|
||||
ImmShift = 11,
|
||||
ImmMask = 3 << ImmShift,
|
||||
ImmShift = 13,
|
||||
ImmMask = 7 << ImmShift,
|
||||
Imm8 = 1 << ImmShift,
|
||||
Imm16 = 2 << ImmShift,
|
||||
Imm32 = 3 << ImmShift,
|
||||
Imm64 = 4 << ImmShift,
|
||||
|
||||
//===------------------------------------------------------------------===//
|
||||
// FP Instruction Classification... Zero is non-fp instruction.
|
||||
|
||||
// FPTypeMask - Mask for all of the FP types...
|
||||
FPTypeShift = 13,
|
||||
FPTypeShift = 16,
|
||||
FPTypeMask = 7 << FPTypeShift,
|
||||
|
||||
// NotFP - The default, set for instructions that do not use FP registers.
|
||||
|
@ -162,9 +179,9 @@ namespace X86II {
|
|||
// SpecialFP - Special instruction forms. Dispatch by opcode explicitly.
|
||||
SpecialFP = 7 << FPTypeShift,
|
||||
|
||||
OpcodeShift = 16,
|
||||
// Bits 19 -> 23 are unused
|
||||
OpcodeShift = 24,
|
||||
OpcodeMask = 0xFF << OpcodeShift
|
||||
// Bits 25 -> 31 are unused
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -216,6 +233,8 @@ public:
|
|||
virtual MachineBasicBlock::iterator
|
||||
reverseBranchCondition(MachineBasicBlock::iterator MI) const;
|
||||
|
||||
const TargetRegisterClass *getPointerRegClass() const;
|
||||
|
||||
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
|
||||
// specified opcode number.
|
||||
//
|
||||
|
|
|
@ -39,7 +39,7 @@ def SDT_X86CallSeqStart : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
|
|||
def SDT_X86CallSeqEnd : SDTypeProfile<0, 2, [ SDTCisVT<0, i32>,
|
||||
SDTCisVT<1, i32> ]>;
|
||||
|
||||
def SDT_X86Call : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
|
||||
def SDT_X86Call : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
|
||||
|
||||
def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
|
||||
|
||||
|
@ -95,7 +95,7 @@ def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
|
|||
class X86MemOperand<string printMethod> : Operand<iPTR> {
|
||||
let PrintMethod = printMethod;
|
||||
let NumMIOperands = 4;
|
||||
let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
|
||||
let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
|
||||
}
|
||||
|
||||
def i8mem : X86MemOperand<"printi8mem">;
|
||||
|
@ -107,6 +107,12 @@ def f32mem : X86MemOperand<"printf32mem">;
|
|||
def f64mem : X86MemOperand<"printf64mem">;
|
||||
def f128mem : X86MemOperand<"printf128mem">;
|
||||
|
||||
def lea32mem : Operand<i32> {
|
||||
let PrintMethod = "printi32mem";
|
||||
let NumMIOperands = 4;
|
||||
let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
|
||||
}
|
||||
|
||||
def SSECC : Operand<i8> {
|
||||
let PrintMethod = "printSSECC";
|
||||
}
|
||||
|
@ -129,9 +135,9 @@ def brtarget : Operand<OtherVT>;
|
|||
//
|
||||
|
||||
// Define X86 specific addressing mode.
|
||||
def addr : ComplexPattern<iPTR, 4, "SelectAddr", []>;
|
||||
def leaaddr : ComplexPattern<iPTR, 4, "SelectLEAAddr",
|
||||
[add, mul, shl, or, frameindex]>;
|
||||
def addr : ComplexPattern<iPTR, 4, "SelectAddr", []>;
|
||||
def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr",
|
||||
[add, mul, shl, or, frameindex]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Instruction Format Definitions.
|
||||
|
@ -158,11 +164,13 @@ def MRMInitReg : Format<32>;
|
|||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Instruction Predicate Definitions.
|
||||
def HasMMX : Predicate<"Subtarget->hasMMX()">;
|
||||
def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
|
||||
def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
|
||||
def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
|
||||
def FPStack : Predicate<"!Subtarget->hasSSE2()">;
|
||||
def HasMMX : Predicate<"Subtarget->hasMMX()">;
|
||||
def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
|
||||
def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
|
||||
def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
|
||||
def FPStack : Predicate<"!Subtarget->hasSSE2()">;
|
||||
def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
|
||||
def In64BitMode : Predicate<"Subtarget->is64Bit()">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 specific pattern fragments.
|
||||
|
@ -171,13 +179,14 @@ def FPStack : Predicate<"!Subtarget->hasSSE2()">;
|
|||
// ImmType - This specifies the immediate type used by an instruction. This is
|
||||
// part of the ad-hoc solution used to emit machine instruction encodings by our
|
||||
// machine code emitter.
|
||||
class ImmType<bits<2> val> {
|
||||
bits<2> Value = val;
|
||||
class ImmType<bits<3> val> {
|
||||
bits<3> Value = val;
|
||||
}
|
||||
def NoImm : ImmType<0>;
|
||||
def Imm8 : ImmType<1>;
|
||||
def Imm16 : ImmType<2>;
|
||||
def Imm32 : ImmType<3>;
|
||||
def Imm64 : ImmType<4>;
|
||||
|
||||
// FPFormat - This specifies what form this FP instruction has. This is used by
|
||||
// the Floating-Point stackifier pass.
|
||||
|
@ -202,7 +211,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag ops, string AsmStr>
|
|||
Format Form = f;
|
||||
bits<6> FormBits = Form.Value;
|
||||
ImmType ImmT = i;
|
||||
bits<2> ImmTypeBits = ImmT.Value;
|
||||
bits<3> ImmTypeBits = ImmT.Value;
|
||||
|
||||
dag OperandList = ops;
|
||||
string AsmString = AsmStr;
|
||||
|
@ -210,9 +219,11 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag ops, string AsmStr>
|
|||
//
|
||||
// Attributes specific to X86 instructions...
|
||||
//
|
||||
bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
|
||||
bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
|
||||
bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix?
|
||||
|
||||
bits<4> Prefix = 0; // Which prefix byte does this inst have?
|
||||
bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix?
|
||||
FPFormat FPForm; // What flavor of FP instruction is this?
|
||||
bits<3> FPFormBits = 0;
|
||||
}
|
||||
|
@ -226,6 +237,8 @@ class Imp<list<Register> uses, list<Register> defs> {
|
|||
// Prefix byte classes which are used to indicate to the ad-hoc machine code
|
||||
// emitter that various prefix bytes are required.
|
||||
class OpSize { bit hasOpSizePrefix = 1; }
|
||||
class AdSize { bit hasAdSizePrefix = 1; }
|
||||
class REX_W { bit hasREX_WPrefix = 1; }
|
||||
class TB { bits<4> Prefix = 1; }
|
||||
class REP { bits<4> Prefix = 2; }
|
||||
class D8 { bits<4> Prefix = 3; }
|
||||
|
@ -276,8 +289,6 @@ def i32immSExt8 : PatLeaf<(i32 imm), [{
|
|||
}]>;
|
||||
|
||||
// Helper fragments for loads.
|
||||
def loadiPTR : PatFrag<(ops node:$ptr), (iPTR (load node:$ptr))>;
|
||||
|
||||
def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
|
||||
def loadi16 : PatFrag<(ops node:$ptr), (i16 (load node:$ptr))>;
|
||||
def loadi32 : PatFrag<(ops node:$ptr), (i32 (load node:$ptr))>;
|
||||
|
@ -308,6 +319,7 @@ def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extload node:$ptr, i16))>;
|
|||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction templates...
|
||||
//
|
||||
|
||||
class I<bits<8> o, Format f, dag ops, string asm, list<dag> pattern>
|
||||
: X86Inst<o, f, NoImm, ops, asm> {
|
||||
|
@ -355,13 +367,13 @@ def IMPLICIT_DEF_GR32 : I<0, Pseudo, (ops GR32:$dst),
|
|||
def NOOP : I<0x90, RawFrm, (ops), "nop", []>;
|
||||
|
||||
// Truncate
|
||||
def TRUNC_GR32_GR8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR32_:$src),
|
||||
"mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}", []>;
|
||||
def TRUNC_GR16_GR8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR16_:$src),
|
||||
"mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}}", []>;
|
||||
def TRUNC_GR32_GR16 : I<0x89, MRMDestReg, (ops GR16:$dst, GR32:$src),
|
||||
"mov{w} {${src:subreg16}, $dst|$dst, ${src:subreg16}}",
|
||||
[(set GR16:$dst, (trunc GR32:$src))]>;
|
||||
def TRUNC_32_to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR32_:$src),
|
||||
"mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}", []>;
|
||||
def TRUNC_16_to8 : I<0x88, MRMDestReg, (ops GR8:$dst, GR16_:$src),
|
||||
"mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}}", []>;
|
||||
def TRUNC_32to16 : I<0x89, MRMDestReg, (ops GR16:$dst, GR32:$src),
|
||||
"mov{w} {${src:subreg16}, $dst|$dst, ${src:subreg16}}",
|
||||
[(set GR16:$dst, (trunc GR32:$src))]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Control Flow Instructions...
|
||||
|
@ -388,7 +400,7 @@ let isBranch = 1, isTerminator = 1, noResults = 1, isBarrier = 1 in {
|
|||
def JMP32r : I<0xFF, MRM4r, (ops GR32:$dst), "jmp{l} {*}$dst",
|
||||
[(brind GR32:$dst)]>;
|
||||
def JMP32m : I<0xFF, MRM4m, (ops i32mem:$dst), "jmp{l} {*}$dst",
|
||||
[(brind (loadiPTR addr:$dst))]>;
|
||||
[(brind (loadi32 addr:$dst))]>;
|
||||
}
|
||||
|
||||
// Conditional branches
|
||||
|
@ -510,9 +522,9 @@ def LEA16r : I<0x8D, MRMSrcMem,
|
|||
(ops GR16:$dst, i32mem:$src),
|
||||
"lea{w} {$src|$dst}, {$dst|$src}", []>, OpSize;
|
||||
def LEA32r : I<0x8D, MRMSrcMem,
|
||||
(ops GR32:$dst, i32mem:$src),
|
||||
(ops GR32:$dst, lea32mem:$src),
|
||||
"lea{l} {$src|$dst}, {$dst|$src}",
|
||||
[(set GR32:$dst, leaaddr:$src)]>;
|
||||
[(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
|
||||
|
||||
def REP_MOVSB : I<0xA4, RawFrm, (ops), "{rep;movsb|rep movsb}",
|
||||
[(X86rep_movs i8)]>,
|
||||
|
@ -1101,9 +1113,10 @@ def INC8r : I<0xFE, MRM0r, (ops GR8 :$dst, GR8 :$src), "inc{b} $dst",
|
|||
[(set GR8:$dst, (add GR8:$src, 1))]>;
|
||||
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
|
||||
def INC16r : I<0x40, AddRegFrm, (ops GR16:$dst, GR16:$src), "inc{w} $dst",
|
||||
[(set GR16:$dst, (add GR16:$src, 1))]>, OpSize;
|
||||
[(set GR16:$dst, (add GR16:$src, 1))]>,
|
||||
OpSize, Requires<[In32BitMode]>;
|
||||
def INC32r : I<0x40, AddRegFrm, (ops GR32:$dst, GR32:$src), "inc{l} $dst",
|
||||
[(set GR32:$dst, (add GR32:$src, 1))]>;
|
||||
[(set GR32:$dst, (add GR32:$src, 1))]>, Requires<[In32BitMode]>;
|
||||
}
|
||||
let isTwoAddress = 0, CodeSize = 2 in {
|
||||
def INC8m : I<0xFE, MRM0m, (ops i8mem :$dst), "inc{b} $dst",
|
||||
|
@ -1119,9 +1132,10 @@ def DEC8r : I<0xFE, MRM1r, (ops GR8 :$dst, GR8 :$src), "dec{b} $dst",
|
|||
[(set GR8:$dst, (add GR8:$src, -1))]>;
|
||||
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
|
||||
def DEC16r : I<0x48, AddRegFrm, (ops GR16:$dst, GR16:$src), "dec{w} $dst",
|
||||
[(set GR16:$dst, (add GR16:$src, -1))]>, OpSize;
|
||||
[(set GR16:$dst, (add GR16:$src, -1))]>,
|
||||
OpSize, Requires<[In32BitMode]>;
|
||||
def DEC32r : I<0x48, AddRegFrm, (ops GR32:$dst, GR32:$src), "dec{l} $dst",
|
||||
[(set GR32:$dst, (add GR32:$src, -1))]>;
|
||||
[(set GR32:$dst, (add GR32:$src, -1))]>, Requires<[In32BitMode]>;
|
||||
}
|
||||
|
||||
let isTwoAddress = 0, CodeSize = 2 in {
|
||||
|
@ -2455,7 +2469,7 @@ def DWARF_LABEL : I<0, Pseudo, (ops i32imm:$id),
|
|||
// Non-Instruction Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// ConstantPool GlobalAddress, ExternalSymbol
|
||||
// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
|
||||
def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>;
|
||||
def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>;
|
||||
def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
|
||||
|
@ -2477,18 +2491,16 @@ def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
|
|||
|
||||
// Calls
|
||||
def : Pat<(X86tailcall GR32:$dst),
|
||||
(CALL32r GR32:$dst)>;
|
||||
(CALL32r GR32:$dst)>;
|
||||
|
||||
def : Pat<(X86tailcall tglobaladdr:$dst),
|
||||
def : Pat<(X86tailcall (i32 tglobaladdr:$dst)),
|
||||
(CALLpcrel32 tglobaladdr:$dst)>;
|
||||
def : Pat<(X86tailcall texternalsym:$dst),
|
||||
def : Pat<(X86tailcall (i32 texternalsym:$dst)),
|
||||
(CALLpcrel32 texternalsym:$dst)>;
|
||||
|
||||
|
||||
|
||||
def : Pat<(X86call tglobaladdr:$dst),
|
||||
def : Pat<(X86call (i32 tglobaladdr:$dst)),
|
||||
(CALLpcrel32 tglobaladdr:$dst)>;
|
||||
def : Pat<(X86call texternalsym:$dst),
|
||||
def : Pat<(X86call (i32 texternalsym:$dst)),
|
||||
(CALLpcrel32 texternalsym:$dst)>;
|
||||
|
||||
// X86 specific add which produces a flag.
|
||||
|
@ -2611,3 +2623,9 @@ include "X86InstrMMX.td"
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
include "X86InstrSSE.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86-64 Support
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
include "X86InstrX86-64.td"
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -86,8 +86,9 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
|
|||
if (MRegisterInfo::isPhysicalRegister(MO.getReg())) {
|
||||
unsigned Reg = MO.getReg();
|
||||
if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
|
||||
MVT::ValueType VT = (strcmp(Modifier,"subreg16") == 0)
|
||||
? MVT::i16 : MVT::i8;
|
||||
MVT::ValueType VT = (strcmp(Modifier,"subreg64") == 0) ?
|
||||
MVT::i64 : ((strcmp(Modifier, "subreg32") == 0) ? MVT::i32 :
|
||||
((strcmp(Modifier,"subreg16") == 0) ? MVT::i16 :MVT::i8));
|
||||
Reg = getX86SubSuperRegister(Reg, VT);
|
||||
}
|
||||
O << RI.get(Reg).Name;
|
||||
|
@ -137,7 +138,8 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
|
|||
}
|
||||
}
|
||||
|
||||
void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
|
||||
void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
|
||||
const char *Modifier) {
|
||||
assert(isMem(MI, Op) && "Invalid memory reference!");
|
||||
|
||||
const MachineOperand &BaseReg = MI->getOperand(Op);
|
||||
|
@ -156,7 +158,7 @@ void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
|
|||
O << "[";
|
||||
bool NeedPlus = false;
|
||||
if (BaseReg.getReg()) {
|
||||
printOp(BaseReg, "mem");
|
||||
printOp(BaseReg, Modifier);
|
||||
NeedPlus = true;
|
||||
}
|
||||
|
||||
|
@ -164,7 +166,7 @@ void X86IntelAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op){
|
|||
if (NeedPlus) O << " + ";
|
||||
if (ScaleVal != 1)
|
||||
O << ScaleVal << "*";
|
||||
printOp(IndexReg);
|
||||
printOp(IndexReg, Modifier);
|
||||
NeedPlus = true;
|
||||
}
|
||||
|
||||
|
@ -259,14 +261,21 @@ void X86IntelAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
|
|||
// See if a truncate instruction can be turned into a nop.
|
||||
switch (MI->getOpcode()) {
|
||||
default: break;
|
||||
case X86::TRUNC_GR32_GR16:
|
||||
case X86::TRUNC_GR32_GR8:
|
||||
case X86::TRUNC_GR16_GR8: {
|
||||
case X86::TRUNC_64to32:
|
||||
case X86::TRUNC_64to16:
|
||||
case X86::TRUNC_32to16:
|
||||
case X86::TRUNC_32to8:
|
||||
case X86::TRUNC_16to8:
|
||||
case X86::TRUNC_32_to8:
|
||||
case X86::TRUNC_16_to8: {
|
||||
const MachineOperand &MO0 = MI->getOperand(0);
|
||||
const MachineOperand &MO1 = MI->getOperand(1);
|
||||
unsigned Reg0 = MO0.getReg();
|
||||
unsigned Reg1 = MO1.getReg();
|
||||
if (MI->getOpcode() == X86::TRUNC_GR32_GR16)
|
||||
unsigned Opc = MI->getOpcode();
|
||||
if (Opc == X86::TRUNC_64to32)
|
||||
Reg1 = getX86SubSuperRegister(Reg1, MVT::i32);
|
||||
else if (Opc == X86::TRUNC_32to16 || Opc == X86::TRUNC_64to16)
|
||||
Reg1 = getX86SubSuperRegister(Reg1, MVT::i16);
|
||||
else
|
||||
Reg1 = getX86SubSuperRegister(Reg1, MVT::i8);
|
||||
|
@ -275,6 +284,9 @@ void X86IntelAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
|
|||
O << "\n\t";
|
||||
break;
|
||||
}
|
||||
case X86::PsMOVZX64rr32:
|
||||
O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t";
|
||||
break;
|
||||
}
|
||||
|
||||
// Call the autogenerated instruction printer routines.
|
||||
|
|
|
@ -80,6 +80,10 @@ struct X86IntelAsmPrinter : public X86SharedAsmPrinter {
|
|||
O << "XMMWORD PTR ";
|
||||
printMemReference(MI, OpNo);
|
||||
}
|
||||
void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) {
|
||||
O << "QWORD PTR ";
|
||||
printMemReference(MI, OpNo, "subreg64");
|
||||
}
|
||||
|
||||
bool printAsmMRegister(const MachineOperand &MO, const char Mode);
|
||||
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
|
||||
|
@ -89,7 +93,8 @@ struct X86IntelAsmPrinter : public X86SharedAsmPrinter {
|
|||
void printMachineInstruction(const MachineInstr *MI);
|
||||
void printOp(const MachineOperand &MO, const char *Modifier = 0);
|
||||
void printSSECC(const MachineInstr *MI, unsigned Op);
|
||||
void printMemReference(const MachineInstr *MI, unsigned Op);
|
||||
void printMemReference(const MachineInstr *MI, unsigned Op,
|
||||
const char *Modifier=NULL);
|
||||
void printPICLabel(const MachineInstr *MI, unsigned Op);
|
||||
bool runOnMachineFunction(MachineFunction &F);
|
||||
bool doInitialization(Module &M);
|
||||
|
|
|
@ -42,7 +42,65 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction;
|
|||
// Provide a wrapper for X86CompilationCallback2 that saves non-traditional
|
||||
// callee saved registers, for the fastcc calling convention.
|
||||
extern "C" {
|
||||
#if defined(__i386__) || defined(i386) || defined(_M_IX86)
|
||||
#if defined(__x86_64__)
|
||||
// No need to save EAX/EDX for X86-64.
|
||||
void X86CompilationCallback(void);
|
||||
asm(
|
||||
".text\n"
|
||||
".align 8\n"
|
||||
".globl _X86CompilationCallback\n"
|
||||
"_X86CompilationCallback:\n"
|
||||
// Save RBP
|
||||
"pushq %rbp\n"
|
||||
// Save RSP
|
||||
"movq %rsp, %rbp\n"
|
||||
// Save all int arg registers
|
||||
"pushq %rdi\n"
|
||||
"pushq %rsi\n"
|
||||
"pushq %rdx\n"
|
||||
"pushq %rcx\n"
|
||||
"pushq %r8\n"
|
||||
"pushq %r9\n"
|
||||
// Align stack on 16-byte boundary. ESP might not be properly aligned
|
||||
// (8 byte) if this is called from an indirect stub.
|
||||
"andq $-16, %rsp\n"
|
||||
// Save all XMM arg registers
|
||||
"subq $128, %rsp\n"
|
||||
"movaps %xmm0, (%rsp)\n"
|
||||
"movaps %xmm1, 16(%rsp)\n"
|
||||
"movaps %xmm2, 32(%rsp)\n"
|
||||
"movaps %xmm3, 48(%rsp)\n"
|
||||
"movaps %xmm4, 64(%rsp)\n"
|
||||
"movaps %xmm5, 80(%rsp)\n"
|
||||
"movaps %xmm6, 96(%rsp)\n"
|
||||
"movaps %xmm7, 112(%rsp)\n"
|
||||
// JIT callee
|
||||
"movq %rbp, %rdi\n" // Pass prev frame and return address
|
||||
"movq 8(%rbp), %rsi\n"
|
||||
"call _X86CompilationCallback2\n"
|
||||
// Restore all XMM arg registers
|
||||
"movaps 112(%rsp), %xmm7\n"
|
||||
"movaps 96(%rsp), %xmm6\n"
|
||||
"movaps 80(%rsp), %xmm5\n"
|
||||
"movaps 64(%rsp), %xmm4\n"
|
||||
"movaps 48(%rsp), %xmm3\n"
|
||||
"movaps 32(%rsp), %xmm2\n"
|
||||
"movaps 16(%rsp), %xmm1\n"
|
||||
"movaps (%rsp), %xmm0\n"
|
||||
// Restore RSP
|
||||
"movq %rbp, %rsp\n"
|
||||
// Restore all int arg registers
|
||||
"subq $48, %rsp\n"
|
||||
"popq %r9\n"
|
||||
"popq %r8\n"
|
||||
"popq %rcx\n"
|
||||
"popq %rdx\n"
|
||||
"popq %rsi\n"
|
||||
"popq %rdi\n"
|
||||
// Restore RBP
|
||||
"popq %rbp\n"
|
||||
"ret\n");
|
||||
#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
|
||||
#ifndef _MSC_VER
|
||||
void X86CompilationCallback(void);
|
||||
asm(
|
||||
|
@ -122,7 +180,7 @@ extern "C" void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
|
|||
"Could not find return address on the stack!");
|
||||
|
||||
// It's a stub if there is an interrupt marker after the call.
|
||||
bool isStub = ((unsigned char*)(intptr_t)RetAddr)[0] == 0xCD;
|
||||
bool isStub = ((unsigned char*)RetAddr)[0] == 0xCD;
|
||||
|
||||
// The call instruction should have pushed the return value onto the stack...
|
||||
RetAddr -= 4; // Backtrack to the reference itself...
|
||||
|
@ -135,20 +193,20 @@ extern "C" void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
|
|||
#endif
|
||||
|
||||
// Sanity check to make sure this really is a call instruction.
|
||||
assert(((unsigned char*)(intptr_t)RetAddr)[-1] == 0xE8 &&"Not a call instr!");
|
||||
assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!");
|
||||
|
||||
unsigned NewVal = (intptr_t)JITCompilerFunction((void*)(intptr_t)RetAddr);
|
||||
intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
|
||||
|
||||
// Rewrite the call target... so that we don't end up here every time we
|
||||
// execute the call.
|
||||
*(unsigned*)(intptr_t)RetAddr = NewVal-RetAddr-4;
|
||||
*(unsigned *)RetAddr = (unsigned)(NewVal-RetAddr-4);
|
||||
|
||||
if (isStub) {
|
||||
// If this is a stub, rewrite the call into an unconditional branch
|
||||
// instruction so that two return addresses are not pushed onto the stack
|
||||
// when the requested function finally gets called. This also makes the
|
||||
// 0xCD byte (interrupt) dead, so the marker doesn't effect anything.
|
||||
((unsigned char*)(intptr_t)RetAddr)[-1] = 0xE9;
|
||||
((unsigned char*)RetAddr)[-1] = 0xE9;
|
||||
}
|
||||
|
||||
// Change the return address to reexecute the call instruction...
|
||||
|
@ -189,16 +247,17 @@ void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
|
|||
void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
|
||||
intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
|
||||
switch ((X86::RelocationType)MR->getRelocationType()) {
|
||||
case X86::reloc_pcrel_word:
|
||||
case X86::reloc_pcrel_word: {
|
||||
// PC relative relocation, add the relocated value to the value already in
|
||||
// memory, after we adjust it for where the PC is.
|
||||
ResultPtr = ResultPtr-(intptr_t)RelocPos-4;
|
||||
*((intptr_t*)RelocPos) += ResultPtr;
|
||||
ResultPtr = ResultPtr-(intptr_t)RelocPos-4-MR->getConstantVal();
|
||||
*((unsigned*)RelocPos) += (unsigned)ResultPtr;
|
||||
break;
|
||||
}
|
||||
case X86::reloc_absolute_word:
|
||||
// Absolute relocation, just add the relocated value to the value already
|
||||
// in memory.
|
||||
*((intptr_t*)RelocPos) += ResultPtr;
|
||||
*((unsigned*)RelocPos) += (unsigned)ResultPtr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,13 +14,13 @@
|
|||
|
||||
#include "X86.h"
|
||||
#include "X86RegisterInfo.h"
|
||||
#include "X86Subtarget.h"
|
||||
#include "X86InstrBuilder.h"
|
||||
#include "X86MachineFunctionInfo.h"
|
||||
#include "X86Subtarget.h"
|
||||
#include "X86TargetMachine.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "llvm/Type.h"
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/Type.h"
|
||||
#include "llvm/CodeGen/ValueTypes.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
|
@ -46,15 +46,32 @@ namespace {
|
|||
cl::Hidden);
|
||||
}
|
||||
|
||||
X86RegisterInfo::X86RegisterInfo(const TargetInstrInfo &tii)
|
||||
: X86GenRegisterInfo(X86::ADJCALLSTACKDOWN, X86::ADJCALLSTACKUP), TII(tii) {}
|
||||
X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
|
||||
const TargetInstrInfo &tii)
|
||||
: X86GenRegisterInfo(X86::ADJCALLSTACKDOWN, X86::ADJCALLSTACKUP),
|
||||
TM(tm), TII(tii) {
|
||||
// Cache some information.
|
||||
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
|
||||
Is64Bit = Subtarget->is64Bit();
|
||||
if (Is64Bit) {
|
||||
SlotSize = 8;
|
||||
StackPtr = X86::RSP;
|
||||
FramePtr = X86::RBP;
|
||||
} else {
|
||||
SlotSize = 4;
|
||||
StackPtr = X86::ESP;
|
||||
FramePtr = X86::EBP;
|
||||
}
|
||||
}
|
||||
|
||||
void X86RegisterInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
unsigned SrcReg, int FrameIdx,
|
||||
const TargetRegisterClass *RC) const {
|
||||
unsigned Opc;
|
||||
if (RC == &X86::GR32RegClass) {
|
||||
if (RC == &X86::GR64RegClass) {
|
||||
Opc = X86::MOV64mr;
|
||||
} else if (RC == &X86::GR32RegClass) {
|
||||
Opc = X86::MOV32mr;
|
||||
} else if (RC == &X86::GR16RegClass) {
|
||||
Opc = X86::MOV16mr;
|
||||
|
@ -84,7 +101,9 @@ void X86RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|||
unsigned DestReg, int FrameIdx,
|
||||
const TargetRegisterClass *RC) const{
|
||||
unsigned Opc;
|
||||
if (RC == &X86::GR32RegClass) {
|
||||
if (RC == &X86::GR64RegClass) {
|
||||
Opc = X86::MOV64rm;
|
||||
} else if (RC == &X86::GR32RegClass) {
|
||||
Opc = X86::MOV32rm;
|
||||
} else if (RC == &X86::GR16RegClass) {
|
||||
Opc = X86::MOV16rm;
|
||||
|
@ -114,7 +133,9 @@ void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
|
|||
unsigned DestReg, unsigned SrcReg,
|
||||
const TargetRegisterClass *RC) const {
|
||||
unsigned Opc;
|
||||
if (RC == &X86::GR32RegClass) {
|
||||
if (RC == &X86::GR64RegClass) {
|
||||
Opc = X86::MOV64rr;
|
||||
} else if (RC == &X86::GR32RegClass) {
|
||||
Opc = X86::MOV32rr;
|
||||
} else if (RC == &X86::GR16RegClass) {
|
||||
Opc = X86::MOV16rr;
|
||||
|
@ -270,12 +291,18 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::ADC32ri, X86::ADC32mi },
|
||||
{ X86::ADC32ri8, X86::ADC32mi8 },
|
||||
{ X86::ADC32rr, X86::ADC32mr },
|
||||
{ X86::ADC64ri32, X86::ADC64mi32 },
|
||||
{ X86::ADC64ri8, X86::ADC64mi8 },
|
||||
{ X86::ADC64rr, X86::ADC64mr },
|
||||
{ X86::ADD16ri, X86::ADD16mi },
|
||||
{ X86::ADD16ri8, X86::ADD16mi8 },
|
||||
{ X86::ADD16rr, X86::ADD16mr },
|
||||
{ X86::ADD32ri, X86::ADD32mi },
|
||||
{ X86::ADD32ri8, X86::ADD32mi8 },
|
||||
{ X86::ADD32rr, X86::ADD32mr },
|
||||
{ X86::ADD64ri32, X86::ADD64mi32 },
|
||||
{ X86::ADD64ri8, X86::ADD64mi8 },
|
||||
{ X86::ADD64rr, X86::ADD64mr },
|
||||
{ X86::ADD8ri, X86::ADD8mi },
|
||||
{ X86::ADD8rr, X86::ADD8mr },
|
||||
{ X86::AND16ri, X86::AND16mi },
|
||||
|
@ -284,19 +311,30 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::AND32ri, X86::AND32mi },
|
||||
{ X86::AND32ri8, X86::AND32mi8 },
|
||||
{ X86::AND32rr, X86::AND32mr },
|
||||
{ X86::AND64ri32, X86::AND64mi32 },
|
||||
{ X86::AND64ri8, X86::AND64mi8 },
|
||||
{ X86::AND64rr, X86::AND64mr },
|
||||
{ X86::AND8ri, X86::AND8mi },
|
||||
{ X86::AND8rr, X86::AND8mr },
|
||||
{ X86::DEC16r, X86::DEC16m },
|
||||
{ X86::DEC32r, X86::DEC32m },
|
||||
{ X86::DEC64_16r, X86::DEC16m },
|
||||
{ X86::DEC64_32r, X86::DEC32m },
|
||||
{ X86::DEC64r, X86::DEC64m },
|
||||
{ X86::DEC8r, X86::DEC8m },
|
||||
{ X86::INC16r, X86::INC16m },
|
||||
{ X86::INC32r, X86::INC32m },
|
||||
{ X86::INC64_16r, X86::INC16m },
|
||||
{ X86::INC64_32r, X86::INC32m },
|
||||
{ X86::INC64r, X86::INC64m },
|
||||
{ X86::INC8r, X86::INC8m },
|
||||
{ X86::NEG16r, X86::NEG16m },
|
||||
{ X86::NEG32r, X86::NEG32m },
|
||||
{ X86::NEG64r, X86::NEG64m },
|
||||
{ X86::NEG8r, X86::NEG8m },
|
||||
{ X86::NOT16r, X86::NOT16m },
|
||||
{ X86::NOT32r, X86::NOT32m },
|
||||
{ X86::NOT64r, X86::NOT64m },
|
||||
{ X86::NOT8r, X86::NOT8m },
|
||||
{ X86::OR16ri, X86::OR16mi },
|
||||
{ X86::OR16ri8, X86::OR16mi8 },
|
||||
|
@ -304,6 +342,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::OR32ri, X86::OR32mi },
|
||||
{ X86::OR32ri8, X86::OR32mi8 },
|
||||
{ X86::OR32rr, X86::OR32mr },
|
||||
{ X86::OR64ri32, X86::OR64mi32 },
|
||||
{ X86::OR64ri8, X86::OR64mi8 },
|
||||
{ X86::OR64rr, X86::OR64mr },
|
||||
{ X86::OR8ri, X86::OR8mi },
|
||||
{ X86::OR8rr, X86::OR8mr },
|
||||
{ X86::ROL16r1, X86::ROL16m1 },
|
||||
|
@ -312,6 +353,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::ROL32r1, X86::ROL32m1 },
|
||||
{ X86::ROL32rCL, X86::ROL32mCL },
|
||||
{ X86::ROL32ri, X86::ROL32mi },
|
||||
{ X86::ROL64r1, X86::ROL64m1 },
|
||||
{ X86::ROL64rCL, X86::ROL64mCL },
|
||||
{ X86::ROL64ri, X86::ROL64mi },
|
||||
{ X86::ROL8r1, X86::ROL8m1 },
|
||||
{ X86::ROL8rCL, X86::ROL8mCL },
|
||||
{ X86::ROL8ri, X86::ROL8mi },
|
||||
|
@ -321,6 +365,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::ROR32r1, X86::ROR32m1 },
|
||||
{ X86::ROR32rCL, X86::ROR32mCL },
|
||||
{ X86::ROR32ri, X86::ROR32mi },
|
||||
{ X86::ROR64r1, X86::ROR64m1 },
|
||||
{ X86::ROR64rCL, X86::ROR64mCL },
|
||||
{ X86::ROR64ri, X86::ROR64mi },
|
||||
{ X86::ROR8r1, X86::ROR8m1 },
|
||||
{ X86::ROR8rCL, X86::ROR8mCL },
|
||||
{ X86::ROR8ri, X86::ROR8mi },
|
||||
|
@ -330,18 +377,27 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::SAR32r1, X86::SAR32m1 },
|
||||
{ X86::SAR32rCL, X86::SAR32mCL },
|
||||
{ X86::SAR32ri, X86::SAR32mi },
|
||||
{ X86::SAR64r1, X86::SAR64m1 },
|
||||
{ X86::SAR64rCL, X86::SAR64mCL },
|
||||
{ X86::SAR64ri, X86::SAR64mi },
|
||||
{ X86::SAR8r1, X86::SAR8m1 },
|
||||
{ X86::SAR8rCL, X86::SAR8mCL },
|
||||
{ X86::SAR8ri, X86::SAR8mi },
|
||||
{ X86::SBB32ri, X86::SBB32mi },
|
||||
{ X86::SBB32ri8, X86::SBB32mi8 },
|
||||
{ X86::SBB32rr, X86::SBB32mr },
|
||||
{ X86::SBB64ri32, X86::SBB64mi32 },
|
||||
{ X86::SBB64ri8, X86::SBB64mi8 },
|
||||
{ X86::SBB64rr, X86::SBB64mr },
|
||||
{ X86::SHL16r1, X86::SHL16m1 },
|
||||
{ X86::SHL16rCL, X86::SHL16mCL },
|
||||
{ X86::SHL16ri, X86::SHL16mi },
|
||||
{ X86::SHL32r1, X86::SHL32m1 },
|
||||
{ X86::SHL32rCL, X86::SHL32mCL },
|
||||
{ X86::SHL32ri, X86::SHL32mi },
|
||||
{ X86::SHL64r1, X86::SHL64m1 },
|
||||
{ X86::SHL64rCL, X86::SHL64mCL },
|
||||
{ X86::SHL64ri, X86::SHL64mi },
|
||||
{ X86::SHL8r1, X86::SHL8m1 },
|
||||
{ X86::SHL8rCL, X86::SHL8mCL },
|
||||
{ X86::SHL8ri, X86::SHL8mi },
|
||||
|
@ -349,12 +405,17 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::SHLD16rri8, X86::SHLD16mri8 },
|
||||
{ X86::SHLD32rrCL, X86::SHLD32mrCL },
|
||||
{ X86::SHLD32rri8, X86::SHLD32mri8 },
|
||||
{ X86::SHLD64rrCL, X86::SHLD64mrCL },
|
||||
{ X86::SHLD64rri8, X86::SHLD64mri8 },
|
||||
{ X86::SHR16r1, X86::SHR16m1 },
|
||||
{ X86::SHR16rCL, X86::SHR16mCL },
|
||||
{ X86::SHR16ri, X86::SHR16mi },
|
||||
{ X86::SHR32r1, X86::SHR32m1 },
|
||||
{ X86::SHR32rCL, X86::SHR32mCL },
|
||||
{ X86::SHR32ri, X86::SHR32mi },
|
||||
{ X86::SHR64r1, X86::SHR64m1 },
|
||||
{ X86::SHR64rCL, X86::SHR64mCL },
|
||||
{ X86::SHR64ri, X86::SHR64mi },
|
||||
{ X86::SHR8r1, X86::SHR8m1 },
|
||||
{ X86::SHR8rCL, X86::SHR8mCL },
|
||||
{ X86::SHR8ri, X86::SHR8mi },
|
||||
|
@ -362,12 +423,17 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::SHRD16rri8, X86::SHRD16mri8 },
|
||||
{ X86::SHRD32rrCL, X86::SHRD32mrCL },
|
||||
{ X86::SHRD32rri8, X86::SHRD32mri8 },
|
||||
{ X86::SHRD64rrCL, X86::SHRD64mrCL },
|
||||
{ X86::SHRD64rri8, X86::SHRD64mri8 },
|
||||
{ X86::SUB16ri, X86::SUB16mi },
|
||||
{ X86::SUB16ri8, X86::SUB16mi8 },
|
||||
{ X86::SUB16rr, X86::SUB16mr },
|
||||
{ X86::SUB32ri, X86::SUB32mi },
|
||||
{ X86::SUB32ri8, X86::SUB32mi8 },
|
||||
{ X86::SUB32rr, X86::SUB32mr },
|
||||
{ X86::SUB64ri32, X86::SUB64mi32 },
|
||||
{ X86::SUB64ri8, X86::SUB64mi8 },
|
||||
{ X86::SUB64rr, X86::SUB64mr },
|
||||
{ X86::SUB8ri, X86::SUB8mi },
|
||||
{ X86::SUB8rr, X86::SUB8mr },
|
||||
{ X86::XOR16ri, X86::XOR16mi },
|
||||
|
@ -376,6 +442,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::XOR32ri, X86::XOR32mi },
|
||||
{ X86::XOR32ri8, X86::XOR32mi8 },
|
||||
{ X86::XOR32rr, X86::XOR32mr },
|
||||
{ X86::XOR64ri32, X86::XOR64mi32 },
|
||||
{ X86::XOR64ri8, X86::XOR64mi8 },
|
||||
{ X86::XOR64rr, X86::XOR64mr },
|
||||
{ X86::XOR8ri, X86::XOR8mi },
|
||||
{ X86::XOR8rr, X86::XOR8mr }
|
||||
};
|
||||
|
@ -388,6 +457,8 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
return MakeM0Inst(X86::MOV16mi, FrameIndex, MI);
|
||||
else if (MI->getOpcode() == X86::MOV32r0)
|
||||
return MakeM0Inst(X86::MOV32mi, FrameIndex, MI);
|
||||
else if (MI->getOpcode() == X86::MOV64r0)
|
||||
return MakeM0Inst(X86::MOV64mi32, FrameIndex, MI);
|
||||
else if (MI->getOpcode() == X86::MOV8r0)
|
||||
return MakeM0Inst(X86::MOV8mi, FrameIndex, MI);
|
||||
|
||||
|
@ -399,19 +470,24 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::CMP8ri, X86::CMP8mi },
|
||||
{ X86::DIV16r, X86::DIV16m },
|
||||
{ X86::DIV32r, X86::DIV32m },
|
||||
{ X86::DIV64r, X86::DIV64m },
|
||||
{ X86::DIV8r, X86::DIV8m },
|
||||
{ X86::FsMOVAPDrr, X86::MOVSDmr },
|
||||
{ X86::FsMOVAPSrr, X86::MOVSSmr },
|
||||
{ X86::IDIV16r, X86::IDIV16m },
|
||||
{ X86::IDIV32r, X86::IDIV32m },
|
||||
{ X86::IDIV64r, X86::IDIV64m },
|
||||
{ X86::IDIV8r, X86::IDIV8m },
|
||||
{ X86::IMUL16r, X86::IMUL16m },
|
||||
{ X86::IMUL32r, X86::IMUL32m },
|
||||
{ X86::IMUL64r, X86::IMUL64m },
|
||||
{ X86::IMUL8r, X86::IMUL8m },
|
||||
{ X86::MOV16ri, X86::MOV16mi },
|
||||
{ X86::MOV16rr, X86::MOV16mr },
|
||||
{ X86::MOV32ri, X86::MOV32mi },
|
||||
{ X86::MOV32rr, X86::MOV32mr },
|
||||
{ X86::MOV64ri32, X86::MOV64mi32 },
|
||||
{ X86::MOV64rr, X86::MOV64mr },
|
||||
{ X86::MOV8ri, X86::MOV8mi },
|
||||
{ X86::MOV8rr, X86::MOV8mr },
|
||||
{ X86::MOVAPDrr, X86::MOVAPDmr },
|
||||
|
@ -424,6 +500,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::MOVUPSrr, X86::MOVUPSmr },
|
||||
{ X86::MUL16r, X86::MUL16m },
|
||||
{ X86::MUL32r, X86::MUL32m },
|
||||
{ X86::MUL64r, X86::MUL64m },
|
||||
{ X86::MUL8r, X86::MUL8m },
|
||||
{ X86::SETAEr, X86::SETAEm },
|
||||
{ X86::SETAr, X86::SETAm },
|
||||
|
@ -441,9 +518,11 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::SETSr, X86::SETSm },
|
||||
{ X86::TEST16ri, X86::TEST16mi },
|
||||
{ X86::TEST32ri, X86::TEST32mi },
|
||||
{ X86::TEST64ri32, X86::TEST64mi32 },
|
||||
{ X86::TEST8ri, X86::TEST8mi },
|
||||
{ X86::XCHG16rr, X86::XCHG16mr },
|
||||
{ X86::XCHG32rr, X86::XCHG32mr },
|
||||
{ X86::XCHG64rr, X86::XCHG64mr },
|
||||
{ X86::XCHG8rr, X86::XCHG8mr }
|
||||
};
|
||||
ASSERT_SORTED(OpcodeTable);
|
||||
|
@ -453,16 +532,23 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
static const TableEntry OpcodeTable[] = {
|
||||
{ X86::CMP16rr, X86::CMP16rm },
|
||||
{ X86::CMP32rr, X86::CMP32rm },
|
||||
{ X86::CMP64ri32, X86::CMP64mi32 },
|
||||
{ X86::CMP64ri8, X86::CMP64mi8 },
|
||||
{ X86::CMP64rr, X86::CMP64rm },
|
||||
{ X86::CMP8rr, X86::CMP8rm },
|
||||
{ X86::CMPPDrri, X86::CMPPDrmi },
|
||||
{ X86::CMPPSrri, X86::CMPPSrmi },
|
||||
{ X86::CMPSDrr, X86::CMPSDrm },
|
||||
{ X86::CMPSSrr, X86::CMPSSrm },
|
||||
{ X86::CVTSD2SSrr, X86::CVTSD2SSrm },
|
||||
{ X86::CVTSI2SD64rr, X86::CVTSI2SD64rm },
|
||||
{ X86::CVTSI2SDrr, X86::CVTSI2SDrm },
|
||||
{ X86::CVTSI2SS64rr, X86::CVTSI2SS64rm },
|
||||
{ X86::CVTSI2SSrr, X86::CVTSI2SSrm },
|
||||
{ X86::CVTSS2SDrr, X86::CVTSS2SDrm },
|
||||
{ X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm },
|
||||
{ X86::CVTTSD2SIrr, X86::CVTTSD2SIrm },
|
||||
{ X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm },
|
||||
{ X86::CVTTSS2SIrr, X86::CVTTSS2SIrm },
|
||||
{ X86::FsMOVAPDrr, X86::MOVSDrm },
|
||||
{ X86::FsMOVAPSrr, X86::MOVSSrm },
|
||||
|
@ -470,6 +556,9 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::IMUL16rri8, X86::IMUL16rmi8 },
|
||||
{ X86::IMUL32rri, X86::IMUL32rmi },
|
||||
{ X86::IMUL32rri8, X86::IMUL32rmi8 },
|
||||
{ X86::IMUL64rr, X86::IMUL64rm },
|
||||
{ X86::IMUL64rri32, X86::IMUL64rmi32 },
|
||||
{ X86::IMUL64rri8, X86::IMUL64rmi8 },
|
||||
{ X86::Int_CMPSDrr, X86::Int_CMPSDrm },
|
||||
{ X86::Int_CMPSSrr, X86::Int_CMPSSrm },
|
||||
{ X86::Int_COMISDrr, X86::Int_COMISDrm },
|
||||
|
@ -480,20 +569,27 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm },
|
||||
{ X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm },
|
||||
{ X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm },
|
||||
{ X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm },
|
||||
{ X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm },
|
||||
{ X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm },
|
||||
{ X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm },
|
||||
{ X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm },
|
||||
{ X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm },
|
||||
{ X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm },
|
||||
{ X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm },
|
||||
{ X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm },
|
||||
{ X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm },
|
||||
{ X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm },
|
||||
{ X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm },
|
||||
{ X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm },
|
||||
{ X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm },
|
||||
{ X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm },
|
||||
{ X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm },
|
||||
{ X86::Int_UCOMISDrr, X86::Int_UCOMISDrm },
|
||||
{ X86::Int_UCOMISSrr, X86::Int_UCOMISSrm },
|
||||
{ X86::MOV16rr, X86::MOV16rm },
|
||||
{ X86::MOV32rr, X86::MOV32rm },
|
||||
{ X86::MOV64rr, X86::MOV64rm },
|
||||
{ X86::MOV8rr, X86::MOV8rm },
|
||||
{ X86::MOVAPDrr, X86::MOVAPDrm },
|
||||
{ X86::MOVAPSrr, X86::MOVAPSrm },
|
||||
|
@ -509,22 +605,30 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::MOVSX16rr8, X86::MOVSX16rm8 },
|
||||
{ X86::MOVSX32rr16, X86::MOVSX32rm16 },
|
||||
{ X86::MOVSX32rr8, X86::MOVSX32rm8 },
|
||||
{ X86::MOVSX64rr16, X86::MOVSX64rm16 },
|
||||
{ X86::MOVSX64rr32, X86::MOVSX64rm32 },
|
||||
{ X86::MOVSX64rr8, X86::MOVSX64rm8 },
|
||||
{ X86::MOVUPDrr, X86::MOVUPDrm },
|
||||
{ X86::MOVUPSrr, X86::MOVUPSrm },
|
||||
{ X86::MOVZX16rr8, X86::MOVZX16rm8 },
|
||||
{ X86::MOVZX32rr16, X86::MOVZX32rm16 },
|
||||
{ X86::MOVZX32rr8, X86::MOVZX32rm8 },
|
||||
{ X86::MOVZX64rr16, X86::MOVZX64rm16 },
|
||||
{ X86::MOVZX64rr8, X86::MOVZX64rm8 },
|
||||
{ X86::PSHUFDri, X86::PSHUFDmi },
|
||||
{ X86::PSHUFHWri, X86::PSHUFHWmi },
|
||||
{ X86::PSHUFLWri, X86::PSHUFLWmi },
|
||||
{ X86::PsMOVZX64rr32, X86::PsMOVZX64rm32 },
|
||||
{ X86::TEST16rr, X86::TEST16rm },
|
||||
{ X86::TEST32rr, X86::TEST32rm },
|
||||
{ X86::TEST64rr, X86::TEST64rm },
|
||||
{ X86::TEST8rr, X86::TEST8rm },
|
||||
// FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
|
||||
{ X86::UCOMISDrr, X86::UCOMISDrm },
|
||||
{ X86::UCOMISSrr, X86::UCOMISSrm },
|
||||
{ X86::XCHG16rr, X86::XCHG16rm },
|
||||
{ X86::XCHG32rr, X86::XCHG32rm },
|
||||
{ X86::XCHG64rr, X86::XCHG64rm },
|
||||
{ X86::XCHG8rr, X86::XCHG8rm }
|
||||
};
|
||||
ASSERT_SORTED(OpcodeTable);
|
||||
|
@ -533,8 +637,10 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
} else if (i == 2) {
|
||||
static const TableEntry OpcodeTable[] = {
|
||||
{ X86::ADC32rr, X86::ADC32rm },
|
||||
{ X86::ADC64rr, X86::ADC64rm },
|
||||
{ X86::ADD16rr, X86::ADD16rm },
|
||||
{ X86::ADD32rr, X86::ADD32rm },
|
||||
{ X86::ADD64rr, X86::ADD64rm },
|
||||
{ X86::ADD8rr, X86::ADD8rm },
|
||||
{ X86::ADDPDrr, X86::ADDPDrm },
|
||||
{ X86::ADDPSrr, X86::ADDPSrm },
|
||||
|
@ -544,6 +650,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::ADDSUBPSrr, X86::ADDSUBPSrm },
|
||||
{ X86::AND16rr, X86::AND16rm },
|
||||
{ X86::AND32rr, X86::AND32rm },
|
||||
{ X86::AND64rr, X86::AND64rm },
|
||||
{ X86::AND8rr, X86::AND8rm },
|
||||
{ X86::ANDNPDrr, X86::ANDNPDrm },
|
||||
{ X86::ANDNPSrr, X86::ANDNPSrm },
|
||||
|
@ -551,32 +658,46 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::ANDPSrr, X86::ANDPSrm },
|
||||
{ X86::CMOVA16rr, X86::CMOVA16rm },
|
||||
{ X86::CMOVA32rr, X86::CMOVA32rm },
|
||||
{ X86::CMOVA64rr, X86::CMOVA64rm },
|
||||
{ X86::CMOVAE16rr, X86::CMOVAE16rm },
|
||||
{ X86::CMOVAE32rr, X86::CMOVAE32rm },
|
||||
{ X86::CMOVAE64rr, X86::CMOVAE64rm },
|
||||
{ X86::CMOVB16rr, X86::CMOVB16rm },
|
||||
{ X86::CMOVB32rr, X86::CMOVB32rm },
|
||||
{ X86::CMOVB64rr, X86::CMOVB64rm },
|
||||
{ X86::CMOVBE16rr, X86::CMOVBE16rm },
|
||||
{ X86::CMOVBE32rr, X86::CMOVBE32rm },
|
||||
{ X86::CMOVBE64rr, X86::CMOVBE64rm },
|
||||
{ X86::CMOVE16rr, X86::CMOVE16rm },
|
||||
{ X86::CMOVE32rr, X86::CMOVE32rm },
|
||||
{ X86::CMOVE64rr, X86::CMOVE64rm },
|
||||
{ X86::CMOVG16rr, X86::CMOVG16rm },
|
||||
{ X86::CMOVG32rr, X86::CMOVG32rm },
|
||||
{ X86::CMOVG64rr, X86::CMOVG64rm },
|
||||
{ X86::CMOVGE16rr, X86::CMOVGE16rm },
|
||||
{ X86::CMOVGE32rr, X86::CMOVGE32rm },
|
||||
{ X86::CMOVGE64rr, X86::CMOVGE64rm },
|
||||
{ X86::CMOVL16rr, X86::CMOVL16rm },
|
||||
{ X86::CMOVL32rr, X86::CMOVL32rm },
|
||||
{ X86::CMOVL64rr, X86::CMOVL64rm },
|
||||
{ X86::CMOVLE16rr, X86::CMOVLE16rm },
|
||||
{ X86::CMOVLE32rr, X86::CMOVLE32rm },
|
||||
{ X86::CMOVLE64rr, X86::CMOVLE64rm },
|
||||
{ X86::CMOVNE16rr, X86::CMOVNE16rm },
|
||||
{ X86::CMOVNE32rr, X86::CMOVNE32rm },
|
||||
{ X86::CMOVNE64rr, X86::CMOVNE64rm },
|
||||
{ X86::CMOVNP16rr, X86::CMOVNP16rm },
|
||||
{ X86::CMOVNP32rr, X86::CMOVNP32rm },
|
||||
{ X86::CMOVNP64rr, X86::CMOVNP64rm },
|
||||
{ X86::CMOVNS16rr, X86::CMOVNS16rm },
|
||||
{ X86::CMOVNS32rr, X86::CMOVNS32rm },
|
||||
{ X86::CMOVNS64rr, X86::CMOVNS64rm },
|
||||
{ X86::CMOVP16rr, X86::CMOVP16rm },
|
||||
{ X86::CMOVP32rr, X86::CMOVP32rm },
|
||||
{ X86::CMOVP64rr, X86::CMOVP64rm },
|
||||
{ X86::CMOVS16rr, X86::CMOVS16rm },
|
||||
{ X86::CMOVS32rr, X86::CMOVS32rm },
|
||||
{ X86::CMOVS64rr, X86::CMOVS64rm },
|
||||
{ X86::DIVPDrr, X86::DIVPDrm },
|
||||
{ X86::DIVPSrr, X86::DIVPSrm },
|
||||
{ X86::DIVSDrr, X86::DIVSDrm },
|
||||
|
@ -597,6 +718,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::MULSSrr, X86::MULSSrm },
|
||||
{ X86::OR16rr, X86::OR16rm },
|
||||
{ X86::OR32rr, X86::OR32rm },
|
||||
{ X86::OR64rr, X86::OR64rm },
|
||||
{ X86::OR8rr, X86::OR8rm },
|
||||
{ X86::ORPDrr, X86::ORPDrm },
|
||||
{ X86::ORPSrr, X86::ORPSrm },
|
||||
|
@ -655,6 +777,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::RCPPSr, X86::RCPPSm },
|
||||
{ X86::RSQRTPSr, X86::RSQRTPSm },
|
||||
{ X86::SBB32rr, X86::SBB32rm },
|
||||
{ X86::SBB64rr, X86::SBB64rm },
|
||||
{ X86::SHUFPDrri, X86::SHUFPDrmi },
|
||||
{ X86::SHUFPSrri, X86::SHUFPSrmi },
|
||||
{ X86::SQRTPDr, X86::SQRTPDm },
|
||||
|
@ -663,6 +786,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::SQRTSSr, X86::SQRTSSm },
|
||||
{ X86::SUB16rr, X86::SUB16rm },
|
||||
{ X86::SUB32rr, X86::SUB32rm },
|
||||
{ X86::SUB64rr, X86::SUB64rm },
|
||||
{ X86::SUB8rr, X86::SUB8rm },
|
||||
{ X86::SUBPDrr, X86::SUBPDrm },
|
||||
{ X86::SUBPSrr, X86::SUBPSrm },
|
||||
|
@ -675,6 +799,7 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
{ X86::UNPCKLPSrr, X86::UNPCKLPSrm },
|
||||
{ X86::XOR16rr, X86::XOR16rm },
|
||||
{ X86::XOR32rr, X86::XOR32rm },
|
||||
{ X86::XOR64rr, X86::XOR64rm },
|
||||
{ X86::XOR8rr, X86::XOR8rm },
|
||||
{ X86::XORPDrr, X86::XORPDrm },
|
||||
{ X86::XORPSrr, X86::XORPSrm }
|
||||
|
@ -707,19 +832,29 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI,
|
|||
|
||||
|
||||
const unsigned *X86RegisterInfo::getCalleeSaveRegs() const {
|
||||
static const unsigned CalleeSaveRegs[] = {
|
||||
static const unsigned CalleeSaveRegs32Bit[] = {
|
||||
X86::ESI, X86::EDI, X86::EBX, X86::EBP, 0
|
||||
};
|
||||
return CalleeSaveRegs;
|
||||
static const unsigned CalleeSaveRegs64Bit[] = {
|
||||
X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
|
||||
};
|
||||
|
||||
return Is64Bit ? CalleeSaveRegs64Bit : CalleeSaveRegs32Bit;
|
||||
}
|
||||
|
||||
const TargetRegisterClass* const*
|
||||
X86RegisterInfo::getCalleeSaveRegClasses() const {
|
||||
static const TargetRegisterClass * const CalleeSaveRegClasses[] = {
|
||||
static const TargetRegisterClass * const CalleeSaveRegClasses32Bit[] = {
|
||||
&X86::GR32RegClass, &X86::GR32RegClass,
|
||||
&X86::GR32RegClass, &X86::GR32RegClass, 0
|
||||
};
|
||||
return CalleeSaveRegClasses;
|
||||
static const TargetRegisterClass * const CalleeSaveRegClasses64Bit[] = {
|
||||
&X86::GR64RegClass, &X86::GR64RegClass,
|
||||
&X86::GR64RegClass, &X86::GR64RegClass,
|
||||
&X86::GR64RegClass, &X86::GR64RegClass, 0
|
||||
};
|
||||
|
||||
return Is64Bit ? CalleeSaveRegClasses64Bit : CalleeSaveRegClasses32Bit;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -754,15 +889,18 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
|||
|
||||
MachineInstr *New = 0;
|
||||
if (Old->getOpcode() == X86::ADJCALLSTACKDOWN) {
|
||||
New=BuildMI(X86::SUB32ri, 2, X86::ESP).addReg(X86::ESP).addImm(Amount);
|
||||
New=BuildMI(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri, 1, StackPtr)
|
||||
.addReg(StackPtr).addImm(Amount);
|
||||
} else {
|
||||
assert(Old->getOpcode() == X86::ADJCALLSTACKUP);
|
||||
// factor out the amount the callee already popped.
|
||||
unsigned CalleeAmt = Old->getOperand(1).getImmedValue();
|
||||
Amount -= CalleeAmt;
|
||||
if (Amount) {
|
||||
unsigned Opc = Amount < 128 ? X86::ADD32ri8 : X86::ADD32ri;
|
||||
New = BuildMI(Opc, 2, X86::ESP).addReg(X86::ESP).addImm(Amount);
|
||||
unsigned Opc = (Amount < 128) ?
|
||||
(Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
|
||||
(Is64Bit ? X86::ADD64ri32 : X86::ADD32ri);
|
||||
New = BuildMI(Opc, 1, StackPtr).addReg(StackPtr).addImm(Amount);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -774,9 +912,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
|||
// something off the stack pointer, add it back. We do this until we have
|
||||
// more advanced stack pointer tracking ability.
|
||||
if (unsigned CalleeAmt = I->getOperand(1).getImmedValue()) {
|
||||
unsigned Opc = CalleeAmt < 128 ? X86::SUB32ri8 : X86::SUB32ri;
|
||||
unsigned Opc = (CalleeAmt < 128) ?
|
||||
(Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
|
||||
(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri);
|
||||
MachineInstr *New =
|
||||
BuildMI(Opc, 1, X86::ESP).addReg(X86::ESP).addImm(CalleeAmt);
|
||||
BuildMI(Opc, 1, StackPtr).addReg(StackPtr).addImm(CalleeAmt);
|
||||
MBB.insert(I, New);
|
||||
}
|
||||
}
|
||||
|
@ -794,19 +934,18 @@ void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II) const{
|
|||
}
|
||||
|
||||
int FrameIndex = MI.getOperand(i).getFrameIndex();
|
||||
|
||||
// This must be part of a four operand memory reference. Replace the
|
||||
// FrameIndex with base register with EBP. Add add an offset to the offset.
|
||||
MI.getOperand(i).ChangeToRegister(hasFP(MF) ? X86::EBP : X86::ESP, false);
|
||||
// FrameIndex with base register with EBP. Add an offset to the offset.
|
||||
MI.getOperand(i).ChangeToRegister(hasFP(MF) ? FramePtr : StackPtr, false);
|
||||
|
||||
// Now add the frame object offset to the offset from EBP.
|
||||
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
|
||||
MI.getOperand(i+3).getImmedValue()+4;
|
||||
MI.getOperand(i+3).getImmedValue()+SlotSize;
|
||||
|
||||
if (!hasFP(MF))
|
||||
Offset += MF.getFrameInfo()->getStackSize();
|
||||
else
|
||||
Offset += 4; // Skip the saved EBP
|
||||
Offset += SlotSize; // Skip the saved EBP
|
||||
|
||||
MI.getOperand(i+3).ChangeToImmediate(Offset);
|
||||
}
|
||||
|
@ -815,7 +954,7 @@ void
|
|||
X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{
|
||||
if (hasFP(MF)) {
|
||||
// Create a frame entry for the EBP register that must be saved.
|
||||
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, -8);
|
||||
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize,SlotSize * -2);
|
||||
assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
|
||||
"Slot for EBP register must be last in order to be found!");
|
||||
}
|
||||
|
@ -840,9 +979,9 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
|
|||
if (!hasFP(MF))
|
||||
NumBytes += MFI->getMaxCallFrameSize();
|
||||
|
||||
// Round the size to a multiple of the alignment (don't forget the 4 byte
|
||||
// Round the size to a multiple of the alignment (don't forget the 4/8 byte
|
||||
// offset though).
|
||||
NumBytes = ((NumBytes+4)+Align-1)/Align*Align - 4;
|
||||
NumBytes = ((NumBytes+SlotSize)+Align-1)/Align*Align - SlotSize;
|
||||
}
|
||||
|
||||
// Update frame info to pretend that this is part of the stack...
|
||||
|
@ -859,8 +998,10 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
|
|||
MI = BuildMI(X86::CALLpcrel32, 1).addExternalSymbol("_alloca");
|
||||
MBB.insert(MBBI, MI);
|
||||
} else {
|
||||
unsigned Opc = NumBytes < 128 ? X86::SUB32ri8 : X86::SUB32ri;
|
||||
MI = BuildMI(Opc, 2, X86::ESP).addReg(X86::ESP).addImm(NumBytes);
|
||||
unsigned Opc = (NumBytes < 128) ?
|
||||
(Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
|
||||
(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri);
|
||||
MI= BuildMI(Opc, 1, StackPtr).addReg(StackPtr).addImm(NumBytes);
|
||||
MBB.insert(MBBI, MI);
|
||||
}
|
||||
}
|
||||
|
@ -868,18 +1009,21 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
|
|||
if (hasFP(MF)) {
|
||||
// Get the offset of the stack slot for the EBP register... which is
|
||||
// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
|
||||
int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexBegin())+4;
|
||||
int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexBegin())+SlotSize;
|
||||
|
||||
// Save EBP into the appropriate stack slot...
|
||||
MI = addRegOffset(BuildMI(X86::MOV32mr, 5), // mov [ESP-<offset>], EBP
|
||||
X86::ESP, EBPOffset+NumBytes).addReg(X86::EBP);
|
||||
// mov [ESP-<offset>], EBP
|
||||
MI = addRegOffset(BuildMI(Is64Bit ? X86::MOV64mr : X86::MOV32mr, 5),
|
||||
StackPtr, EBPOffset+NumBytes).addReg(FramePtr);
|
||||
MBB.insert(MBBI, MI);
|
||||
|
||||
// Update EBP with the new base value...
|
||||
if (NumBytes == 4) // mov EBP, ESP
|
||||
MI = BuildMI(X86::MOV32rr, 2, X86::EBP).addReg(X86::ESP);
|
||||
if (NumBytes == SlotSize) // mov EBP, ESP
|
||||
MI = BuildMI(Is64Bit ? X86::MOV64rr : X86::MOV32rr, 2, FramePtr).
|
||||
addReg(StackPtr);
|
||||
else // lea EBP, [ESP+StackSize]
|
||||
MI = addRegOffset(BuildMI(X86::LEA32r, 5, X86::EBP), X86::ESP,NumBytes-4);
|
||||
MI = addRegOffset(BuildMI(Is64Bit ? X86::LEA64r : X86::LEA32r,
|
||||
5, FramePtr), StackPtr, NumBytes-SlotSize);
|
||||
|
||||
MBB.insert(MBBI, MI);
|
||||
}
|
||||
|
@ -916,13 +1060,14 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
|
|||
if (hasFP(MF)) {
|
||||
// Get the offset of the stack slot for the EBP register... which is
|
||||
// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
|
||||
int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexEnd()-1)+4;
|
||||
int EBPOffset = MFI->getObjectOffset(MFI->getObjectIndexEnd()-1)+SlotSize;
|
||||
|
||||
// mov ESP, EBP
|
||||
BuildMI(MBB, MBBI, X86::MOV32rr, 1, X86::ESP).addReg(X86::EBP);
|
||||
BuildMI(MBB, MBBI, Is64Bit ? X86::MOV64rr : X86::MOV32rr, 1, StackPtr).
|
||||
addReg(FramePtr);
|
||||
|
||||
// pop EBP
|
||||
BuildMI(MBB, MBBI, X86::POP32r, 0, X86::EBP);
|
||||
BuildMI(MBB, MBBI, Is64Bit ? X86::POP64r : X86::POP32r, 0, FramePtr);
|
||||
} else {
|
||||
// Get the number of bytes allocated from the FrameInfo...
|
||||
unsigned NumBytes = MFI->getStackSize();
|
||||
|
@ -932,14 +1077,15 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
|
|||
// instruction, merge the two instructions.
|
||||
if (MBBI != MBB.begin()) {
|
||||
MachineBasicBlock::iterator PI = prior(MBBI);
|
||||
if ((PI->getOpcode() == X86::ADD32ri ||
|
||||
PI->getOpcode() == X86::ADD32ri8) &&
|
||||
PI->getOperand(0).getReg() == X86::ESP) {
|
||||
unsigned Opc = PI->getOpcode();
|
||||
if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
|
||||
Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
|
||||
PI->getOperand(0).getReg() == StackPtr) {
|
||||
NumBytes += PI->getOperand(2).getImmedValue();
|
||||
MBB.erase(PI);
|
||||
} else if ((PI->getOpcode() == X86::SUB32ri ||
|
||||
PI->getOpcode() == X86::SUB32ri8) &&
|
||||
PI->getOperand(0).getReg() == X86::ESP) {
|
||||
} else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
|
||||
Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
|
||||
PI->getOperand(0).getReg() == StackPtr) {
|
||||
NumBytes -= PI->getOperand(2).getImmedValue();
|
||||
MBB.erase(PI);
|
||||
} else if (PI->getOpcode() == X86::ADJSTACKPTRri) {
|
||||
|
@ -949,11 +1095,15 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
|
|||
}
|
||||
|
||||
if (NumBytes > 0) {
|
||||
unsigned Opc = NumBytes < 128 ? X86::ADD32ri8 : X86::ADD32ri;
|
||||
BuildMI(MBB, MBBI, Opc, 2, X86::ESP).addReg(X86::ESP).addImm(NumBytes);
|
||||
unsigned Opc = (NumBytes < 128) ?
|
||||
(Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
|
||||
(Is64Bit ? X86::ADD64ri32 : X86::ADD32ri);
|
||||
BuildMI(MBB, MBBI, Opc, 2, StackPtr).addReg(StackPtr).addImm(NumBytes);
|
||||
} else if ((int)NumBytes < 0) {
|
||||
unsigned Opc = -NumBytes < 128 ? X86::SUB32ri8 : X86::SUB32ri;
|
||||
BuildMI(MBB, MBBI, Opc, 2, X86::ESP).addReg(X86::ESP).addImm(-NumBytes);
|
||||
unsigned Opc = (-NumBytes < 128) ?
|
||||
(Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
|
||||
(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri);
|
||||
BuildMI(MBB, MBBI, Opc, 2, StackPtr).addReg(StackPtr).addImm(-NumBytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -964,7 +1114,7 @@ unsigned X86RegisterInfo::getRARegister() const {
|
|||
}
|
||||
|
||||
unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const {
|
||||
return hasFP(MF) ? X86::EBP : X86::ESP;
|
||||
return hasFP(MF) ? FramePtr : StackPtr;
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
|
@ -974,68 +1124,160 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::ValueType VT, bool High) {
|
|||
case MVT::i8:
|
||||
if (High) {
|
||||
switch (Reg) {
|
||||
default: return Reg;
|
||||
case X86::AH: case X86::AL: case X86::AX: case X86::EAX:
|
||||
default: return 0;
|
||||
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
|
||||
return X86::AH;
|
||||
case X86::DH: case X86::DL: case X86::DX: case X86::EDX:
|
||||
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
|
||||
return X86::DH;
|
||||
case X86::CH: case X86::CL: case X86::CX: case X86::ECX:
|
||||
case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
|
||||
return X86::CH;
|
||||
case X86::BH: case X86::BL: case X86::BX: case X86::EBX:
|
||||
case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
|
||||
return X86::BH;
|
||||
}
|
||||
} else {
|
||||
switch (Reg) {
|
||||
default: return Reg;
|
||||
case X86::AH: case X86::AL: case X86::AX: case X86::EAX:
|
||||
default: return 0;
|
||||
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
|
||||
return X86::AL;
|
||||
case X86::DH: case X86::DL: case X86::DX: case X86::EDX:
|
||||
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
|
||||
return X86::DL;
|
||||
case X86::CH: case X86::CL: case X86::CX: case X86::ECX:
|
||||
case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
|
||||
return X86::CL;
|
||||
case X86::BH: case X86::BL: case X86::BX: case X86::EBX:
|
||||
case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
|
||||
return X86::BL;
|
||||
case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
|
||||
return X86::SIL;
|
||||
case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
|
||||
return X86::DIL;
|
||||
case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
|
||||
return X86::BPL;
|
||||
case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
|
||||
return X86::SPL;
|
||||
case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
|
||||
return X86::R8B;
|
||||
case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
|
||||
return X86::R9B;
|
||||
case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
|
||||
return X86::R10B;
|
||||
case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
|
||||
return X86::R11B;
|
||||
case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
|
||||
return X86::R12B;
|
||||
case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
|
||||
return X86::R13B;
|
||||
case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
|
||||
return X86::R14B;
|
||||
case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
|
||||
return X86::R15B;
|
||||
}
|
||||
}
|
||||
case MVT::i16:
|
||||
switch (Reg) {
|
||||
default: return Reg;
|
||||
case X86::AH: case X86::AL: case X86::AX: case X86::EAX:
|
||||
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
|
||||
return X86::AX;
|
||||
case X86::DH: case X86::DL: case X86::DX: case X86::EDX:
|
||||
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
|
||||
return X86::DX;
|
||||
case X86::CH: case X86::CL: case X86::CX: case X86::ECX:
|
||||
case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
|
||||
return X86::CX;
|
||||
case X86::BH: case X86::BL: case X86::BX: case X86::EBX:
|
||||
case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
|
||||
return X86::BX;
|
||||
case X86::ESI:
|
||||
case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
|
||||
return X86::SI;
|
||||
case X86::EDI:
|
||||
case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
|
||||
return X86::DI;
|
||||
case X86::EBP:
|
||||
case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
|
||||
return X86::BP;
|
||||
case X86::ESP:
|
||||
case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
|
||||
return X86::SP;
|
||||
case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
|
||||
return X86::R8W;
|
||||
case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
|
||||
return X86::R9W;
|
||||
case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
|
||||
return X86::R10W;
|
||||
case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
|
||||
return X86::R11W;
|
||||
case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
|
||||
return X86::R12W;
|
||||
case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
|
||||
return X86::R13W;
|
||||
case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
|
||||
return X86::R14W;
|
||||
case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
|
||||
return X86::R15W;
|
||||
}
|
||||
case MVT::i32:
|
||||
switch (Reg) {
|
||||
default: return true;
|
||||
case X86::AH: case X86::AL: case X86::AX: case X86::EAX:
|
||||
default: return Reg;
|
||||
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
|
||||
return X86::EAX;
|
||||
case X86::DH: case X86::DL: case X86::DX: case X86::EDX:
|
||||
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
|
||||
return X86::EDX;
|
||||
case X86::CH: case X86::CL: case X86::CX: case X86::ECX:
|
||||
case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
|
||||
return X86::ECX;
|
||||
case X86::BH: case X86::BL: case X86::BX: case X86::EBX:
|
||||
case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
|
||||
return X86::EBX;
|
||||
case X86::SI:
|
||||
case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
|
||||
return X86::ESI;
|
||||
case X86::DI:
|
||||
case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
|
||||
return X86::EDI;
|
||||
case X86::BP:
|
||||
case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
|
||||
return X86::EBP;
|
||||
case X86::SP:
|
||||
case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
|
||||
return X86::ESP;
|
||||
case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
|
||||
return X86::R8D;
|
||||
case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
|
||||
return X86::R9D;
|
||||
case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
|
||||
return X86::R10D;
|
||||
case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
|
||||
return X86::R11D;
|
||||
case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
|
||||
return X86::R12D;
|
||||
case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
|
||||
return X86::R13D;
|
||||
case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
|
||||
return X86::R14D;
|
||||
case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
|
||||
return X86::R15D;
|
||||
}
|
||||
case MVT::i64:
|
||||
switch (Reg) {
|
||||
default: return Reg;
|
||||
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
|
||||
return X86::RAX;
|
||||
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
|
||||
return X86::RDX;
|
||||
case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
|
||||
return X86::RCX;
|
||||
case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
|
||||
return X86::RBX;
|
||||
case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
|
||||
return X86::RSI;
|
||||
case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
|
||||
return X86::RDI;
|
||||
case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
|
||||
return X86::RBP;
|
||||
case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
|
||||
return X86::RSP;
|
||||
case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
|
||||
return X86::R8;
|
||||
case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
|
||||
return X86::R9;
|
||||
case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
|
||||
return X86::R10;
|
||||
case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
|
||||
return X86::R11;
|
||||
case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
|
||||
return X86::R12;
|
||||
case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
|
||||
return X86::R13;
|
||||
case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
|
||||
return X86::R14;
|
||||
case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
|
||||
return X86::R15;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,10 +20,26 @@
|
|||
namespace llvm {
|
||||
class Type;
|
||||
class TargetInstrInfo;
|
||||
class X86TargetMachine;
|
||||
|
||||
struct X86RegisterInfo : public X86GenRegisterInfo {
|
||||
X86TargetMachine &TM;
|
||||
const TargetInstrInfo &TII;
|
||||
X86RegisterInfo(const TargetInstrInfo &tii);
|
||||
private:
|
||||
/// Is64Bit - Is the target 64-bits.
|
||||
bool Is64Bit;
|
||||
|
||||
/// SlotSize - Stack slot size in bytes.
|
||||
unsigned SlotSize;
|
||||
|
||||
/// StackPtr - X86 physical register used as stack ptr.
|
||||
unsigned StackPtr;
|
||||
|
||||
/// FramePtr - X86 physical register used as frame ptr.
|
||||
unsigned FramePtr;
|
||||
|
||||
public:
|
||||
X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
|
||||
|
||||
/// Code Generation virtual methods...
|
||||
void storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
|
|
|
@ -23,35 +23,92 @@ let Namespace = "X86" in {
|
|||
// because the register file generator is smart enough to figure out that
|
||||
// AL aliases AX if we tell it that AX aliased AL (for example).
|
||||
|
||||
// FIXME: X86-64 have different Dwarf numbers.
|
||||
// 64-bit registers, X86-64 only
|
||||
def RAX : Register<"RAX">, DwarfRegNum<0>;
|
||||
def RDX : Register<"RDX">, DwarfRegNum<1>;
|
||||
def RCX : Register<"RCX">, DwarfRegNum<2>;
|
||||
def RBX : Register<"RBX">, DwarfRegNum<3>;
|
||||
def RSI : Register<"RSI">, DwarfRegNum<4>;
|
||||
def RDI : Register<"RDI">, DwarfRegNum<5>;
|
||||
def RBP : Register<"RBP">, DwarfRegNum<6>;
|
||||
def RSP : Register<"RSP">, DwarfRegNum<7>;
|
||||
|
||||
def R8 : Register<"R8">, DwarfRegNum<8>;
|
||||
def R9 : Register<"R9">, DwarfRegNum<9>;
|
||||
def R10 : Register<"R10">, DwarfRegNum<10>;
|
||||
def R11 : Register<"R11">, DwarfRegNum<11>;
|
||||
def R12 : Register<"R12">, DwarfRegNum<12>;
|
||||
def R13 : Register<"R13">, DwarfRegNum<13>;
|
||||
def R14 : Register<"R14">, DwarfRegNum<14>;
|
||||
def R15 : Register<"R15">, DwarfRegNum<15>;
|
||||
|
||||
// 32-bit registers
|
||||
def EAX : Register<"EAX">, DwarfRegNum<0>;
|
||||
def ECX : Register<"ECX">, DwarfRegNum<1>;
|
||||
def EDX : Register<"EDX">, DwarfRegNum<2>;
|
||||
def EBX : Register<"EBX">, DwarfRegNum<3>;
|
||||
def ESP : Register<"ESP">, DwarfRegNum<4>;
|
||||
def EBP : Register<"EBP">, DwarfRegNum<5>;
|
||||
def ESI : Register<"ESI">, DwarfRegNum<6>;
|
||||
def EDI : Register<"EDI">, DwarfRegNum<7>;
|
||||
def EAX : RegisterGroup<"EAX", [RAX]>, DwarfRegNum<0>;
|
||||
def ECX : RegisterGroup<"ECX", [RCX]>, DwarfRegNum<1>;
|
||||
def EDX : RegisterGroup<"EDX", [RDX]>, DwarfRegNum<2>;
|
||||
def EBX : RegisterGroup<"EBX", [RBX]>, DwarfRegNum<3>;
|
||||
def ESP : RegisterGroup<"ESP", [RSP]>, DwarfRegNum<4>;
|
||||
def EBP : RegisterGroup<"EBP", [RBP]>, DwarfRegNum<5>;
|
||||
def ESI : RegisterGroup<"ESI", [RSI]>, DwarfRegNum<6>;
|
||||
def EDI : RegisterGroup<"EDI", [RDI]>, DwarfRegNum<7>;
|
||||
|
||||
// X86-64 only
|
||||
def R8D : RegisterGroup<"R8D", [R8]>, DwarfRegNum<8>;
|
||||
def R9D : RegisterGroup<"R9D", [R9]>, DwarfRegNum<9>;
|
||||
def R10D : RegisterGroup<"R10D", [R10]>, DwarfRegNum<10>;
|
||||
def R11D : RegisterGroup<"R11D", [R11]>, DwarfRegNum<11>;
|
||||
def R12D : RegisterGroup<"R12D", [R12]>, DwarfRegNum<12>;
|
||||
def R13D : RegisterGroup<"R13D", [R13]>, DwarfRegNum<13>;
|
||||
def R14D : RegisterGroup<"R14D", [R14]>, DwarfRegNum<14>;
|
||||
def R15D : RegisterGroup<"R15D", [R15]>, DwarfRegNum<15>;
|
||||
|
||||
// 16-bit registers
|
||||
def AX : RegisterGroup<"AX", [EAX]>, DwarfRegNum<0>;
|
||||
def CX : RegisterGroup<"CX", [ECX]>, DwarfRegNum<1>;
|
||||
def DX : RegisterGroup<"DX", [EDX]>, DwarfRegNum<2>;
|
||||
def BX : RegisterGroup<"BX", [EBX]>, DwarfRegNum<3>;
|
||||
def SP : RegisterGroup<"SP", [ESP]>, DwarfRegNum<4>;
|
||||
def BP : RegisterGroup<"BP", [EBP]>, DwarfRegNum<5>;
|
||||
def SI : RegisterGroup<"SI", [ESI]>, DwarfRegNum<6>;
|
||||
def DI : RegisterGroup<"DI", [EDI]>, DwarfRegNum<7>;
|
||||
def AX : RegisterGroup<"AX", [EAX,RAX]>, DwarfRegNum<0>;
|
||||
def CX : RegisterGroup<"CX", [ECX,RCX]>, DwarfRegNum<1>;
|
||||
def DX : RegisterGroup<"DX", [EDX,RDX]>, DwarfRegNum<2>;
|
||||
def BX : RegisterGroup<"BX", [EBX,RBX]>, DwarfRegNum<3>;
|
||||
def SP : RegisterGroup<"SP", [ESP,RSP]>, DwarfRegNum<4>;
|
||||
def BP : RegisterGroup<"BP", [EBP,RBP]>, DwarfRegNum<5>;
|
||||
def SI : RegisterGroup<"SI", [ESI,RSI]>, DwarfRegNum<6>;
|
||||
def DI : RegisterGroup<"DI", [EDI,RDI]>, DwarfRegNum<7>;
|
||||
|
||||
// X86-64 only
|
||||
def R8W : RegisterGroup<"R8W", [R8D,R8]>, DwarfRegNum<8>;
|
||||
def R9W : RegisterGroup<"R9W", [R9D,R9]>, DwarfRegNum<9>;
|
||||
def R10W : RegisterGroup<"R10W", [R10D,R10]>, DwarfRegNum<10>;
|
||||
def R11W : RegisterGroup<"R11W", [R11D,R11]>, DwarfRegNum<11>;
|
||||
def R12W : RegisterGroup<"R12W", [R12D,R12]>, DwarfRegNum<12>;
|
||||
def R13W : RegisterGroup<"R13W", [R13D,R13]>, DwarfRegNum<13>;
|
||||
def R14W : RegisterGroup<"R14W", [R14D,R14]>, DwarfRegNum<14>;
|
||||
def R15W : RegisterGroup<"R15W", [R15D,R15]>, DwarfRegNum<15>;
|
||||
|
||||
// 8-bit registers
|
||||
def AL : RegisterGroup<"AL", [AX,EAX]>, DwarfRegNum<0>;
|
||||
def CL : RegisterGroup<"CL", [CX,ECX]>, DwarfRegNum<1>;
|
||||
def DL : RegisterGroup<"DL", [DX,EDX]>, DwarfRegNum<2>;
|
||||
def BL : RegisterGroup<"BL", [BX,EBX]>, DwarfRegNum<3>;
|
||||
def AH : RegisterGroup<"AH", [AX,EAX]>, DwarfRegNum<0>;
|
||||
def CH : RegisterGroup<"CH", [CX,ECX]>, DwarfRegNum<1>;
|
||||
def DH : RegisterGroup<"DH", [DX,EDX]>, DwarfRegNum<2>;
|
||||
def BH : RegisterGroup<"BH", [BX,EBX]>, DwarfRegNum<3>;
|
||||
// Low registers
|
||||
def AL : RegisterGroup<"AL", [AX,EAX,RAX]>, DwarfRegNum<0>;
|
||||
def CL : RegisterGroup<"CL", [CX,ECX,RCX]>, DwarfRegNum<1>;
|
||||
def DL : RegisterGroup<"DL", [DX,EDX,RDX]>, DwarfRegNum<2>;
|
||||
def BL : RegisterGroup<"BL", [BX,EBX,RBX]>, DwarfRegNum<3>;
|
||||
|
||||
// X86-64 only
|
||||
def SIL : RegisterGroup<"SIL", [SI,ESI,RSI]>, DwarfRegNum<4>;
|
||||
def DIL : RegisterGroup<"DIL", [DI,EDI,RDI]>, DwarfRegNum<5>;
|
||||
def BPL : RegisterGroup<"BPL", [BP,EBP,RBP]>, DwarfRegNum<6>;
|
||||
def SPL : RegisterGroup<"SPL", [SP,ESP,RSP]>, DwarfRegNum<7>;
|
||||
def R8B : RegisterGroup<"R8B", [R8W,R8D,R8]>, DwarfRegNum<8>;
|
||||
def R9B : RegisterGroup<"R9B", [R9W,R9D,R9]>, DwarfRegNum<9>;
|
||||
def R10B : RegisterGroup<"R10B", [R10W,R10D,R10]>, DwarfRegNum<10>;
|
||||
def R11B : RegisterGroup<"R11B", [R11W,R11D,R11]>, DwarfRegNum<11>;
|
||||
def R12B : RegisterGroup<"R12B", [R12W,R12D,R12]>, DwarfRegNum<12>;
|
||||
def R13B : RegisterGroup<"R13B", [R13W,R13D,R13]>, DwarfRegNum<13>;
|
||||
def R14B : RegisterGroup<"R14B", [R14W,R14D,R14]>, DwarfRegNum<14>;
|
||||
def R15B : RegisterGroup<"R15B", [R15W,R15D,R15]>, DwarfRegNum<15>;
|
||||
|
||||
// High registers X86-32 only
|
||||
def AH : RegisterGroup<"AH", [AX,EAX,RAX]>, DwarfRegNum<0>;
|
||||
def CH : RegisterGroup<"CH", [CX,ECX,RCX]>, DwarfRegNum<1>;
|
||||
def DH : RegisterGroup<"DH", [DX,EDX,RDX]>, DwarfRegNum<2>;
|
||||
def BH : RegisterGroup<"BH", [BX,EBX,RBX]>, DwarfRegNum<3>;
|
||||
|
||||
// MMX Registers. These are actually aliased to ST0 .. ST7
|
||||
def MM0 : Register<"MM0">, DwarfRegNum<29>;
|
||||
|
@ -73,14 +130,24 @@ let Namespace = "X86" in {
|
|||
def FP6 : Register<"FP6">, DwarfRegNum<-1>;
|
||||
|
||||
// XMM Registers, used by the various SSE instruction set extensions
|
||||
def XMM0: Register<"XMM0">, DwarfRegNum<21>;
|
||||
def XMM1: Register<"XMM1">, DwarfRegNum<22>;
|
||||
def XMM2: Register<"XMM2">, DwarfRegNum<23>;
|
||||
def XMM3: Register<"XMM3">, DwarfRegNum<24>;
|
||||
def XMM4: Register<"XMM4">, DwarfRegNum<25>;
|
||||
def XMM5: Register<"XMM5">, DwarfRegNum<26>;
|
||||
def XMM6: Register<"XMM6">, DwarfRegNum<27>;
|
||||
def XMM7: Register<"XMM7">, DwarfRegNum<28>;
|
||||
def XMM0: Register<"XMM0">, DwarfRegNum<17>;
|
||||
def XMM1: Register<"XMM1">, DwarfRegNum<18>;
|
||||
def XMM2: Register<"XMM2">, DwarfRegNum<19>;
|
||||
def XMM3: Register<"XMM3">, DwarfRegNum<20>;
|
||||
def XMM4: Register<"XMM4">, DwarfRegNum<21>;
|
||||
def XMM5: Register<"XMM5">, DwarfRegNum<22>;
|
||||
def XMM6: Register<"XMM6">, DwarfRegNum<23>;
|
||||
def XMM7: Register<"XMM7">, DwarfRegNum<24>;
|
||||
|
||||
// X86-64 only
|
||||
def XMM8: Register<"XMM8">, DwarfRegNum<25>;
|
||||
def XMM9: Register<"XMM9">, DwarfRegNum<26>;
|
||||
def XMM10: Register<"XMM10">, DwarfRegNum<27>;
|
||||
def XMM11: Register<"XMM11">, DwarfRegNum<28>;
|
||||
def XMM12: Register<"XMM12">, DwarfRegNum<29>;
|
||||
def XMM13: Register<"XMM13">, DwarfRegNum<30>;
|
||||
def XMM14: Register<"XMM14">, DwarfRegNum<31>;
|
||||
def XMM15: Register<"XMM15">, DwarfRegNum<32>;
|
||||
|
||||
// Floating point stack registers
|
||||
def ST0 : Register<"ST(0)">, DwarfRegNum<11>;
|
||||
|
@ -99,52 +166,247 @@ let Namespace = "X86" in {
|
|||
// implicitly defined to be the register allocation order.
|
||||
//
|
||||
|
||||
// List AL,CL,DL before AH,CH,DH, as X86 processors often suffer from false
|
||||
// dependences between upper and lower parts of the register. BL and BH are
|
||||
// last because they are call clobbered. Both Athlon and P4 chips suffer this
|
||||
// issue.
|
||||
def GR8 : RegisterClass<"X86", [i8], 8, [AL, CL, DL, AH, CH, DH, BL, BH]>;
|
||||
|
||||
def GR16 : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP]> {
|
||||
// List call-clobbered registers before callee-save registers. RBX, RBP, (and
|
||||
// R12, R13, R14, and R15 for X86-64) are callee-save registers.
|
||||
// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
|
||||
// R8B, ... R15B.
|
||||
// FIXME: Allow AH, CH, DH, BH in 64-mode for non-REX instructions,
|
||||
def GR8 : RegisterClass<"X86", [i8], 8,
|
||||
[AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL, BPL, SPL,
|
||||
R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]> {
|
||||
let MethodProtos = [{
|
||||
iterator allocation_order_begin(const MachineFunction &MF) const;
|
||||
iterator allocation_order_end(const MachineFunction &MF) const;
|
||||
}];
|
||||
let MethodBodies = [{
|
||||
// Does the function dedicate RBP / EBP to being a frame ptr?
|
||||
// If so, don't allocate SPL or BPL.
|
||||
static const unsigned X86_GR8_AO_64_fp[] =
|
||||
{X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
|
||||
X86::R8B, X86::R9B, X86::R10B, X86::R11B,
|
||||
X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B};
|
||||
// If not, just don't allocate SPL.
|
||||
static const unsigned X86_GR8_AO_64[] =
|
||||
{X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL,
|
||||
X86::R8B, X86::R9B, X86::R10B, X86::R11B,
|
||||
X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL};
|
||||
// In 32-mode, none of the 8-bit registers aliases EBP or ESP.
|
||||
static const unsigned X86_GR8_AO_32[] =
|
||||
{X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH};
|
||||
|
||||
GR8Class::iterator
|
||||
GR8Class::allocation_order_begin(const MachineFunction &MF) const {
|
||||
const TargetMachine &TM = MF.getTarget();
|
||||
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
|
||||
if (!Subtarget.is64Bit())
|
||||
return X86_GR8_AO_32;
|
||||
else if (hasFP(MF))
|
||||
return X86_GR8_AO_64_fp;
|
||||
else
|
||||
return X86_GR8_AO_64;
|
||||
}
|
||||
|
||||
GR8Class::iterator
|
||||
GR8Class::allocation_order_end(const MachineFunction &MF) const {
|
||||
const TargetMachine &TM = MF.getTarget();
|
||||
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
|
||||
if (!Subtarget.is64Bit())
|
||||
return X86_GR8_AO_32 + (sizeof(X86_GR8_AO_32) / sizeof(unsigned));
|
||||
else if (hasFP(MF))
|
||||
return X86_GR8_AO_64_fp + (sizeof(X86_GR8_AO_64_fp) / sizeof(unsigned));
|
||||
else
|
||||
return X86_GR8_AO_64 + (sizeof(X86_GR8_AO_64) / sizeof(unsigned));
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
|
||||
def GR16 : RegisterClass<"X86", [i16], 16,
|
||||
[AX, CX, DX, SI, DI, BX, BP, SP,
|
||||
R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]> {
|
||||
let MethodProtos = [{
|
||||
iterator allocation_order_begin(const MachineFunction &MF) const;
|
||||
iterator allocation_order_end(const MachineFunction &MF) const;
|
||||
}];
|
||||
let MethodBodies = [{
|
||||
// Does the function dedicate RBP / EBP to being a frame ptr?
|
||||
// If so, don't allocate SP or BP.
|
||||
static const unsigned X86_GR16_AO_64_fp[] =
|
||||
{X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
|
||||
X86::R8W, X86::R9W, X86::R10W, X86::R11W,
|
||||
X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W};
|
||||
static const unsigned X86_GR16_AO_32_fp[] =
|
||||
{X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX};
|
||||
// If not, just don't allocate SPL.
|
||||
static const unsigned X86_GR16_AO_64[] =
|
||||
{X86::AX, X86::CX, X86::DX, X86::SI, X86::DI,
|
||||
X86::R8W, X86::R9W, X86::R10W, X86::R11W,
|
||||
X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W, X86::BP};
|
||||
static const unsigned X86_GR16_AO_32[] =
|
||||
{X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP};
|
||||
|
||||
GR16Class::iterator
|
||||
GR16Class::allocation_order_begin(const MachineFunction &MF) const {
|
||||
const TargetMachine &TM = MF.getTarget();
|
||||
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
|
||||
if (Subtarget.is64Bit()) {
|
||||
if (hasFP(MF))
|
||||
return X86_GR16_AO_64_fp;
|
||||
else
|
||||
return X86_GR16_AO_64;
|
||||
} else {
|
||||
if (hasFP(MF))
|
||||
return X86_GR16_AO_32_fp;
|
||||
else
|
||||
return X86_GR16_AO_32;
|
||||
}
|
||||
}
|
||||
|
||||
GR16Class::iterator
|
||||
GR16Class::allocation_order_end(const MachineFunction &MF) const {
|
||||
if (hasFP(MF)) // Does the function dedicate EBP to being a frame ptr?
|
||||
return end()-2; // If so, don't allocate SP or BP
|
||||
else
|
||||
return end()-1; // If not, just don't allocate SP
|
||||
const TargetMachine &TM = MF.getTarget();
|
||||
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
|
||||
if (Subtarget.is64Bit()) {
|
||||
if (hasFP(MF))
|
||||
return X86_GR16_AO_64_fp+(sizeof(X86_GR16_AO_64_fp)/sizeof(unsigned));
|
||||
else
|
||||
return X86_GR16_AO_64 + (sizeof(X86_GR16_AO_64) / sizeof(unsigned));
|
||||
} else {
|
||||
if (hasFP(MF))
|
||||
return X86_GR16_AO_32_fp+(sizeof(X86_GR16_AO_32_fp)/sizeof(unsigned));
|
||||
else
|
||||
return X86_GR16_AO_32 + (sizeof(X86_GR16_AO_32) / sizeof(unsigned));
|
||||
}
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
|
||||
def GR32 : RegisterClass<"X86", [i32], 32,
|
||||
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
|
||||
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
|
||||
R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]> {
|
||||
let MethodProtos = [{
|
||||
iterator allocation_order_begin(const MachineFunction &MF) const;
|
||||
iterator allocation_order_end(const MachineFunction &MF) const;
|
||||
}];
|
||||
let MethodBodies = [{
|
||||
// Does the function dedicate RBP / EBP to being a frame ptr?
|
||||
// If so, don't allocate ESP or EBP.
|
||||
static const unsigned X86_GR32_AO_64_fp[] =
|
||||
{X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
|
||||
X86::R8D, X86::R9D, X86::R10D, X86::R11D,
|
||||
X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D};
|
||||
static const unsigned X86_GR32_AO_32_fp[] =
|
||||
{X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX};
|
||||
// If not, just don't allocate SPL.
|
||||
static const unsigned X86_GR32_AO_64[] =
|
||||
{X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI,
|
||||
X86::R8D, X86::R9D, X86::R10D, X86::R11D,
|
||||
X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP};
|
||||
static const unsigned X86_GR32_AO_32[] =
|
||||
{X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP};
|
||||
|
||||
GR32Class::iterator
|
||||
GR32Class::allocation_order_begin(const MachineFunction &MF) const {
|
||||
const TargetMachine &TM = MF.getTarget();
|
||||
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
|
||||
if (Subtarget.is64Bit()) {
|
||||
if (hasFP(MF))
|
||||
return X86_GR32_AO_64_fp;
|
||||
else
|
||||
return X86_GR32_AO_64;
|
||||
} else {
|
||||
if (hasFP(MF))
|
||||
return X86_GR32_AO_32_fp;
|
||||
else
|
||||
return X86_GR32_AO_32;
|
||||
}
|
||||
}
|
||||
|
||||
GR32Class::iterator
|
||||
GR32Class::allocation_order_end(const MachineFunction &MF) const {
|
||||
const TargetMachine &TM = MF.getTarget();
|
||||
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
|
||||
if (Subtarget.is64Bit()) {
|
||||
if (hasFP(MF))
|
||||
return X86_GR32_AO_64_fp+(sizeof(X86_GR32_AO_64_fp)/sizeof(unsigned));
|
||||
else
|
||||
return X86_GR32_AO_64 + (sizeof(X86_GR32_AO_64) / sizeof(unsigned));
|
||||
} else {
|
||||
if (hasFP(MF))
|
||||
return X86_GR32_AO_32_fp+(sizeof(X86_GR32_AO_32_fp)/sizeof(unsigned));
|
||||
else
|
||||
return X86_GR32_AO_32 + (sizeof(X86_GR32_AO_32) / sizeof(unsigned));
|
||||
}
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
|
||||
def GR64 : RegisterClass<"X86", [i64], 64,
|
||||
[RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
|
||||
RBX, R14, R15, R12, R13, RBP, RSP]> {
|
||||
let MethodProtos = [{
|
||||
iterator allocation_order_end(const MachineFunction &MF) const;
|
||||
}];
|
||||
let MethodBodies = [{
|
||||
GR32Class::iterator
|
||||
GR32Class::allocation_order_end(const MachineFunction &MF) const {
|
||||
if (hasFP(MF)) // Does the function dedicate EBP to being a frame ptr?
|
||||
return end()-2; // If so, don't allocate ESP or EBP
|
||||
GR64Class::iterator
|
||||
GR64Class::allocation_order_end(const MachineFunction &MF) const {
|
||||
if (hasFP(MF)) // Does the function dedicate RBP to being a frame ptr?
|
||||
return end()-2; // If so, don't allocate RSP or RBP
|
||||
else
|
||||
return end()-1; // If not, just don't allocate ESP
|
||||
return end()-1; // If not, just don't allocate RSP
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
|
||||
// GR16, GR32 subclasses which contain registers that have R8 sub-registers.
|
||||
// These should only be used for 32-bit mode.
|
||||
def GR16_ : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]>;
|
||||
def GR32_ : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]>;
|
||||
|
||||
// Scalar SSE2 floating point registers.
|
||||
def FR32 : RegisterClass<"X86", [f32], 32,
|
||||
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>;
|
||||
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11,
|
||||
XMM12, XMM13, XMM14, XMM15]> {
|
||||
let MethodProtos = [{
|
||||
iterator allocation_order_end(const MachineFunction &MF) const;
|
||||
}];
|
||||
let MethodBodies = [{
|
||||
FR32Class::iterator
|
||||
FR32Class::allocation_order_end(const MachineFunction &MF) const {
|
||||
const TargetMachine &TM = MF.getTarget();
|
||||
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
|
||||
if (!Subtarget.is64Bit())
|
||||
return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
|
||||
else
|
||||
return end();
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
def FR64 : RegisterClass<"X86", [f64], 64,
|
||||
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>;
|
||||
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11,
|
||||
XMM12, XMM13, XMM14, XMM15]> {
|
||||
let MethodProtos = [{
|
||||
iterator allocation_order_end(const MachineFunction &MF) const;
|
||||
}];
|
||||
let MethodBodies = [{
|
||||
FR64Class::iterator
|
||||
FR64Class::allocation_order_end(const MachineFunction &MF) const {
|
||||
const TargetMachine &TM = MF.getTarget();
|
||||
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
|
||||
if (!Subtarget.is64Bit())
|
||||
return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
|
||||
else
|
||||
return end();
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
|
||||
// FIXME: This sets up the floating point register files as though they are f64
|
||||
// values, though they really are f80 values. This will cause us to spill
|
||||
|
@ -174,4 +436,21 @@ def RST : RegisterClass<"X86", [f64], 32,
|
|||
def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32], 64,
|
||||
[MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
|
||||
def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
|
||||
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>;
|
||||
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11,
|
||||
XMM12, XMM13, XMM14, XMM15]> {
|
||||
let MethodProtos = [{
|
||||
iterator allocation_order_end(const MachineFunction &MF) const;
|
||||
}];
|
||||
let MethodBodies = [{
|
||||
VR128Class::iterator
|
||||
VR128Class::allocation_order_end(const MachineFunction &MF) const {
|
||||
const TargetMachine &TM = MF.getTarget();
|
||||
const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
|
||||
if (!Subtarget.is64Bit())
|
||||
return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
|
||||
else
|
||||
return end();
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
|
|
@ -12,9 +12,10 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "X86Subtarget.h"
|
||||
#include "X86GenSubtarget.inc"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "X86GenSubtarget.inc"
|
||||
#include <iostream>
|
||||
using namespace llvm;
|
||||
|
||||
cl::opt<X86Subtarget::AsmWriterFlavorTy>
|
||||
|
@ -29,7 +30,18 @@ AsmWriterFlavor("x86-asm-syntax", cl::init(X86Subtarget::unset),
|
|||
/// specified arguments. If we can't run cpuid on the host, return true.
|
||||
static bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
|
||||
unsigned *rECX, unsigned *rEDX) {
|
||||
#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
|
||||
#if defined(__x86_64__)
|
||||
asm ("pushq\t%%rbx\n\t"
|
||||
"cpuid\n\t"
|
||||
"movl\t%%ebx, %%esi\n\t"
|
||||
"popq\t%%rbx"
|
||||
: "=a" (*rEAX),
|
||||
"=S" (*rEBX),
|
||||
"=c" (*rECX),
|
||||
"=d" (*rEDX)
|
||||
: "a" (value));
|
||||
return false;
|
||||
#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
|
||||
#if defined(__GNUC__)
|
||||
asm ("pushl\t%%ebx\n\t"
|
||||
"cpuid\n\t"
|
||||
|
@ -99,8 +111,8 @@ static const char *GetCurrentX86CPU() {
|
|||
case 9:
|
||||
case 13: return "pentium-m";
|
||||
case 14: return "yonah";
|
||||
default:
|
||||
return (Model > 14) ? "yonah" : "i686";
|
||||
case 15: return "core2";
|
||||
default: return "i686";
|
||||
}
|
||||
case 15: {
|
||||
switch (Model) {
|
||||
|
@ -154,14 +166,16 @@ static const char *GetCurrentX86CPU() {
|
|||
}
|
||||
}
|
||||
|
||||
X86Subtarget::X86Subtarget(const Module &M, const std::string &FS) {
|
||||
stackAlignment = 8;
|
||||
// FIXME: this is a known good value for Yonah. Not sure about others.
|
||||
MinRepStrSizeThreshold = 128;
|
||||
X86SSELevel = NoMMXSSE;
|
||||
X863DNowLevel = NoThreeDNow;
|
||||
AsmFlavor = AsmWriterFlavor;
|
||||
Is64Bit = false;
|
||||
X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
|
||||
: AsmFlavor(AsmWriterFlavor)
|
||||
, X86SSELevel(NoMMXSSE)
|
||||
, X863DNowLevel(NoThreeDNow)
|
||||
, HasX86_64(false)
|
||||
, stackAlignment(8)
|
||||
// FIXME: this is a known good value for Yonah. How about others?
|
||||
, MinRepStrSizeThreshold(128)
|
||||
, Is64Bit(is64Bit)
|
||||
, TargetType(isELF) { // Default to ELF unless otherwise specified.
|
||||
|
||||
// Determine default and user specified characteristics
|
||||
std::string CPU = GetCurrentX86CPU();
|
||||
|
@ -169,9 +183,12 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS) {
|
|||
// Parse features string.
|
||||
ParseSubtargetFeatures(FS, CPU);
|
||||
|
||||
// Default to ELF unless otherwise specified.
|
||||
TargetType = isELF;
|
||||
|
||||
if (Is64Bit && !HasX86_64) {
|
||||
std::cerr << "Warning: Generation of 64-bit code for a 32-bit processor "
|
||||
"requested.\n";
|
||||
HasX86_64 = true;
|
||||
}
|
||||
|
||||
// Set the boolean corresponding to the current target triple, or the default
|
||||
// if one cannot be determined, to true.
|
||||
const std::string& TT = M.getTargetTriple();
|
||||
|
|
|
@ -44,9 +44,9 @@ protected:
|
|||
|
||||
/// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported.
|
||||
X863DNowEnum X863DNowLevel;
|
||||
|
||||
/// Is64Bit - True if the processor supports Em64T.
|
||||
bool Is64Bit;
|
||||
|
||||
/// HasX86_64 - True if the processor supports X86-64 instructions.
|
||||
bool HasX86_64;
|
||||
|
||||
/// stackAlignment - The minimum alignment known to hold of the stack frame on
|
||||
/// entry to the function and which must be maintained by every function.
|
||||
|
@ -55,6 +55,11 @@ protected:
|
|||
/// Min. memset / memcpy size that is turned into rep/movs, rep/stos ops.
|
||||
unsigned MinRepStrSizeThreshold;
|
||||
|
||||
private:
|
||||
/// Is64Bit - True if the processor supports 64-bit instructions and module
|
||||
/// pointer size is 64 bit.
|
||||
bool Is64Bit;
|
||||
|
||||
public:
|
||||
enum {
|
||||
isELF, isCygwin, isDarwin, isWindows
|
||||
|
@ -63,7 +68,7 @@ public:
|
|||
/// This constructor initializes the data members to match that
|
||||
/// of the specified module.
|
||||
///
|
||||
X86Subtarget(const Module &M, const std::string &FS);
|
||||
X86Subtarget(const Module &M, const std::string &FS, bool is64Bit);
|
||||
|
||||
/// getStackAlignment - Returns the minimum alignment known to hold of the
|
||||
/// stack frame on entry to the function and which must be maintained by every
|
||||
|
|
|
@ -26,13 +26,16 @@ X86TargetAsmInfo::X86TargetAsmInfo(const X86TargetMachine &TM) {
|
|||
case X86Subtarget::isDarwin:
|
||||
AlignmentIsInBytes = false;
|
||||
GlobalPrefix = "_";
|
||||
Data64bitsDirective = 0; // we can't emit a 64-bit unit
|
||||
if (!Subtarget->is64Bit())
|
||||
Data64bitsDirective = 0; // we can't emit a 64-bit unit
|
||||
ZeroDirective = "\t.space\t"; // ".space N" emits N zeros.
|
||||
PrivateGlobalPrefix = "L"; // Marker for constant pool idxs
|
||||
ConstantPoolSection = "\t.const\n";
|
||||
JumpTableDataSection = "\t.const\n"; // FIXME: depends on PIC mode
|
||||
FourByteConstantSection = "\t.literal4\n";
|
||||
EightByteConstantSection = "\t.literal8\n";
|
||||
if (Subtarget->is64Bit())
|
||||
SixteenByteConstantSection = "\t.literal16\n";
|
||||
LCOMMDirective = "\t.lcomm\t";
|
||||
COMMDirectiveTakesAlignment = false;
|
||||
HasDotTypeDotSizeDirective = false;
|
||||
|
|
|
@ -33,22 +33,31 @@ int X86TargetMachineModule = 0;
|
|||
|
||||
namespace {
|
||||
// Register the target.
|
||||
RegisterTarget<X86TargetMachine> X("x86", " IA-32 (Pentium and above)");
|
||||
RegisterTarget<X86_32TargetMachine>
|
||||
X("x86", " 32-bit X86: Pentium-Pro and above");
|
||||
RegisterTarget<X86_64TargetMachine>
|
||||
Y("x86-64", " 64-bit X86: EM64T and AMD64");
|
||||
}
|
||||
|
||||
const TargetAsmInfo *X86TargetMachine::createTargetAsmInfo() const {
|
||||
return new X86TargetAsmInfo(*this);
|
||||
}
|
||||
|
||||
unsigned X86TargetMachine::getJITMatchQuality() {
|
||||
unsigned X86_32TargetMachine::getJITMatchQuality() {
|
||||
#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
|
||||
return 10;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned X86TargetMachine::getModuleMatchQuality(const Module &M) {
|
||||
unsigned X86_64TargetMachine::getJITMatchQuality() {
|
||||
#if defined(__x86_64__)
|
||||
return 10;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned X86_32TargetMachine::getModuleMatchQuality(const Module &M) {
|
||||
// We strongly match "i[3-9]86-*".
|
||||
std::string TT = M.getTargetTriple();
|
||||
if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
|
||||
|
@ -65,18 +74,55 @@ unsigned X86TargetMachine::getModuleMatchQuality(const Module &M) {
|
|||
return getJITMatchQuality()/2;
|
||||
}
|
||||
|
||||
unsigned X86_64TargetMachine::getModuleMatchQuality(const Module &M) {
|
||||
// We strongly match "x86_64-*".
|
||||
std::string TT = M.getTargetTriple();
|
||||
if (TT.size() >= 7 && TT[0] == 'x' && TT[1] == '8' && TT[2] == '6' &&
|
||||
TT[3] == '_' && TT[4] == '6' && TT[5] == '4' && TT[6] == '-')
|
||||
return 20;
|
||||
|
||||
if (M.getEndianness() == Module::LittleEndian &&
|
||||
M.getPointerSize() == Module::Pointer64)
|
||||
return 10; // Weak match
|
||||
else if (M.getEndianness() != Module::AnyEndianness ||
|
||||
M.getPointerSize() != Module::AnyPointerSize)
|
||||
return 0; // Match for some other target
|
||||
|
||||
return getJITMatchQuality()/2;
|
||||
}
|
||||
|
||||
X86_32TargetMachine::X86_32TargetMachine(const Module &M, const std::string &FS)
|
||||
: X86TargetMachine(M, FS, false) {
|
||||
}
|
||||
|
||||
|
||||
X86_64TargetMachine::X86_64TargetMachine(const Module &M, const std::string &FS)
|
||||
: X86TargetMachine(M, FS, true) {
|
||||
}
|
||||
|
||||
/// X86TargetMachine ctor - Create an ILP32 architecture model
|
||||
///
|
||||
X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS)
|
||||
: Subtarget(M, FS), DataLayout("e-p:32:32-d:32-l:32"),
|
||||
X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS, bool is64Bit)
|
||||
: Subtarget(M, FS, is64Bit),
|
||||
DataLayout(Subtarget.is64Bit() ?
|
||||
std::string("e-p:64:64-d:32-l:32") :
|
||||
std::string("e-p:32:32-d:32-l:32")),
|
||||
FrameInfo(TargetFrameInfo::StackGrowsDown,
|
||||
Subtarget.getStackAlignment(), -4),
|
||||
Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
|
||||
InstrInfo(*this), JITInfo(*this), TLInfo(*this) {
|
||||
if (getRelocationModel() == Reloc::Default)
|
||||
if (Subtarget.isTargetDarwin())
|
||||
setRelocationModel(Reloc::DynamicNoPIC);
|
||||
else
|
||||
setRelocationModel(Reloc::PIC_);
|
||||
if (Subtarget.is64Bit()) {
|
||||
// No DynamicNoPIC support under X86-64.
|
||||
if (getRelocationModel() == Reloc::DynamicNoPIC)
|
||||
setRelocationModel(Reloc::PIC_);
|
||||
// Default X86-64 code model is small.
|
||||
if (getCodeModel() == CodeModel::Default)
|
||||
setCodeModel(CodeModel::Small);
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -37,7 +37,7 @@ protected:
|
|||
virtual const TargetAsmInfo *createTargetAsmInfo() const;
|
||||
|
||||
public:
|
||||
X86TargetMachine(const Module &M, const std::string &FS);
|
||||
X86TargetMachine(const Module &M, const std::string &FS, bool is64Bit);
|
||||
|
||||
virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; }
|
||||
virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
|
||||
|
@ -54,6 +54,7 @@ public:
|
|||
static unsigned getModuleMatchQuality(const Module &M);
|
||||
static unsigned getJITMatchQuality();
|
||||
|
||||
|
||||
// Set up the pass pipeline.
|
||||
virtual bool addInstSelector(FunctionPassManager &PM, bool Fast);
|
||||
virtual bool addPostRegAlloc(FunctionPassManager &PM, bool Fast);
|
||||
|
@ -64,6 +65,27 @@ public:
|
|||
virtual bool addCodeEmitter(FunctionPassManager &PM, bool Fast,
|
||||
MachineCodeEmitter &MCE);
|
||||
};
|
||||
|
||||
/// X86_32TargetMachine - X86 32-bit target machine.
|
||||
///
|
||||
class X86_32TargetMachine : public X86TargetMachine {
|
||||
public:
|
||||
X86_32TargetMachine(const Module &M, const std::string &FS);
|
||||
|
||||
static unsigned getJITMatchQuality();
|
||||
static unsigned getModuleMatchQuality(const Module &M);
|
||||
};
|
||||
|
||||
/// X86_64TargetMachine - X86 64-bit target machine.
|
||||
///
|
||||
class X86_64TargetMachine : public X86TargetMachine {
|
||||
public:
|
||||
X86_64TargetMachine(const Module &M, const std::string &FS);
|
||||
|
||||
static unsigned getJITMatchQuality();
|
||||
static unsigned getModuleMatchQuality(const Module &M);
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue