2010-12-05 07:57:24 +08:00
|
|
|
//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
|
2005-07-12 09:41:54 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
2007-12-30 04:36:04 +08:00
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
2005-07-12 09:41:54 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2011-07-02 05:01:15 +08:00
|
|
|
// This file implements the X86 specific subclass of TargetSubtargetInfo.
|
2005-07-12 09:41:54 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2017-07-01 08:45:50 +08:00
|
|
|
#include "X86.h"
|
|
|
|
|
|
|
|
#include "X86CallLowering.h"
|
|
|
|
#include "X86LegalizerInfo.h"
|
|
|
|
#include "X86RegisterBankInfo.h"
|
2005-07-12 09:41:54 +08:00
|
|
|
#include "X86Subtarget.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "MCTargetDesc/X86BaseInfo.h"
|
2014-08-09 09:07:25 +08:00
|
|
|
#include "X86TargetMachine.h"
|
2017-02-03 06:55:55 +08:00
|
|
|
#include "llvm/ADT/Triple.h"
|
2017-07-01 08:45:50 +08:00
|
|
|
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
|
|
|
|
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
|
2013-02-16 06:31:27 +08:00
|
|
|
#include "llvm/IR/Attributes.h"
|
2017-02-02 08:32:03 +08:00
|
|
|
#include "llvm/IR/ConstantRange.h"
|
2013-02-16 06:31:27 +08:00
|
|
|
#include "llvm/IR/Function.h"
|
2013-01-02 19:36:10 +08:00
|
|
|
#include "llvm/IR/GlobalValue.h"
|
2017-02-03 06:55:55 +08:00
|
|
|
#include "llvm/Support/Casting.h"
|
|
|
|
#include "llvm/Support/CodeGen.h"
|
2014-05-22 07:51:57 +08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2009-01-03 12:04:46 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
2011-09-08 00:10:57 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2006-12-23 06:29:05 +08:00
|
|
|
#include "llvm/Target/TargetMachine.h"
|
2011-07-02 04:45:01 +08:00
|
|
|
|
2009-04-26 02:27:23 +08:00
|
|
|
#if defined(_MSC_VER)
|
2009-08-03 08:11:34 +08:00
|
|
|
#include <intrin.h>
|
2009-04-26 02:27:23 +08:00
|
|
|
#endif
|
|
|
|
|
2014-04-22 10:41:26 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "subtarget"
|
|
|
|
|
2014-04-22 10:03:14 +08:00
|
|
|
#define GET_SUBTARGETINFO_TARGET_DESC
|
|
|
|
#define GET_SUBTARGETINFO_CTOR
|
|
|
|
#include "X86GenSubtargetInfo.inc"
|
|
|
|
|
2014-05-22 07:40:26 +08:00
|
|
|
// Temporary option to control early if-conversion for x86 while adding machine
|
|
|
|
// models.
|
|
|
|
static cl::opt<bool>
|
|
|
|
X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
|
|
|
|
cl::desc("Enable early if-conversion on X86"));
|
|
|
|
|
|
|
|
|
2015-08-14 23:11:42 +08:00
|
|
|
/// Classify a blockaddress reference for the current subtarget according to how
|
|
|
|
/// we should reference it in a non-pcrel context.
|
2016-05-20 02:34:20 +08:00
|
|
|
unsigned char X86Subtarget::classifyBlockAddressReference() const {
|
2016-05-20 20:20:10 +08:00
|
|
|
return classifyLocalReference(nullptr);
|
|
|
|
}
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2016-05-20 20:20:10 +08:00
|
|
|
/// Classify a global variable reference for the current subtarget according to
|
|
|
|
/// how we should reference it in a non-pcrel context.
|
|
|
|
unsigned char
|
|
|
|
X86Subtarget::classifyGlobalReference(const GlobalValue *GV) const {
|
|
|
|
return classifyGlobalReference(GV, *GV->getParent());
|
|
|
|
}
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2016-05-20 20:20:10 +08:00
|
|
|
unsigned char
|
|
|
|
X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
|
2018-07-24 05:14:35 +08:00
|
|
|
// If we're not PIC, it's not very interesting.
|
|
|
|
if (!isPositionIndependent())
|
2018-06-22 06:19:05 +08:00
|
|
|
return X86II::MO_NO_FLAG;
|
[X86] Implement more of x86-64 large and medium PIC code models
Summary:
The large code model allows code and data segments to exceed 2GB, which
means that some symbol references may require a displacement that cannot
be encoded as a displacement from RIP. The large PIC model even relaxes
the assumption that the GOT itself is within 2GB of all code. Therefore,
we need a special code sequence to materialize it:
.LtmpN:
leaq .LtmpN(%rip), %rbx
movabsq $_GLOBAL_OFFSET_TABLE_-.LtmpN, %rax # Scratch
addq %rax, %rbx # GOT base reg
From that, non-local references go through the GOT base register instead
of being PC-relative loads. Local references typically use GOTOFF
symbols, like this:
movq extern_gv@GOT(%rbx), %rax
movq local_gv@GOTOFF(%rbx), %rax
All calls end up being indirect:
movabsq $local_fn@GOTOFF, %rax
addq %rbx, %rax
callq *%rax
The medium code model retains the assumption that the code segment is
less than 2GB, so calls are once again direct, and the RIP-relative
loads can be used to access the GOT. Materializing the GOT is easy:
leaq _GLOBAL_OFFSET_TABLE_(%rip), %rbx # GOT base reg
DSO local data accesses will use it:
movq local_gv@GOTOFF(%rbx), %rax
Non-local data accesses will use RIP-relative addressing, which means we
may not always need to materialize the GOT base:
movq extern_gv@GOTPCREL(%rip), %rax
Direct calls are basically the same as they are in the small code model:
They use direct, PC-relative addressing, and the PLT is used for calls
to non-local functions.
This patch adds reasonably comprehensive testing of LEA, but there are
lots of interesting folding opportunities that are unimplemented.
Reviewers: chandlerc, echristo
Subscribers: hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D47211
llvm-svn: 335297
2018-06-22 05:55:08 +08:00
|
|
|
|
2018-07-24 05:14:35 +08:00
|
|
|
if (is64Bit()) {
|
|
|
|
// 64-bit ELF PIC local references may use GOTOFF relocations.
|
|
|
|
if (isTargetELF()) {
|
|
|
|
switch (TM.getCodeModel()) {
|
|
|
|
// 64-bit small code model is simple: All rip-relative.
|
2018-08-22 19:31:39 +08:00
|
|
|
case CodeModel::Tiny:
|
|
|
|
llvm_unreachable("Tiny codesize model not supported on X86");
|
2018-07-24 05:14:35 +08:00
|
|
|
case CodeModel::Small:
|
|
|
|
case CodeModel::Kernel:
|
|
|
|
return X86II::MO_NO_FLAG;
|
|
|
|
|
|
|
|
// The large PIC code model uses GOTOFF.
|
|
|
|
case CodeModel::Large:
|
|
|
|
return X86II::MO_GOTOFF;
|
|
|
|
|
|
|
|
// Medium is a hybrid: RIP-rel for code, GOTOFF for DSO local data.
|
|
|
|
case CodeModel::Medium:
|
|
|
|
if (isa<Function>(GV))
|
|
|
|
return X86II::MO_NO_FLAG; // All code is RIP-relative
|
|
|
|
return X86II::MO_GOTOFF; // Local symbols use GOTOFF.
|
|
|
|
}
|
|
|
|
llvm_unreachable("invalid code model");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise, this is either a RIP-relative reference or a 64-bit movabsq,
|
|
|
|
// both of which use MO_NO_FLAG.
|
2018-06-29 01:56:43 +08:00
|
|
|
return X86II::MO_NO_FLAG;
|
2018-07-24 05:14:35 +08:00
|
|
|
}
|
Re-land r335297 "[X86] Implement more of x86-64 large and medium PIC code models"
The large code model allows code and data segments to exceed 2GB, which
means that some symbol references may require a displacement that cannot
be encoded as a displacement from RIP. The large PIC model even relaxes
the assumption that the GOT itself is within 2GB of all code. Therefore,
we need a special code sequence to materialize it:
.LtmpN:
leaq .LtmpN(%rip), %rbx
movabsq $_GLOBAL_OFFSET_TABLE_-.LtmpN, %rax # Scratch
addq %rax, %rbx # GOT base reg
From that, non-local references go through the GOT base register instead
of being PC-relative loads. Local references typically use GOTOFF
symbols, like this:
movq extern_gv@GOT(%rbx), %rax
movq local_gv@GOTOFF(%rbx), %rax
All calls end up being indirect:
movabsq $local_fn@GOTOFF, %rax
addq %rbx, %rax
callq *%rax
The medium code model retains the assumption that the code segment is
less than 2GB, so calls are once again direct, and the RIP-relative
loads can be used to access the GOT. Materializing the GOT is easy:
leaq _GLOBAL_OFFSET_TABLE_(%rip), %rbx # GOT base reg
DSO local data accesses will use it:
movq local_gv@GOTOFF(%rbx), %rax
Non-local data accesses will use RIP-relative addressing, which means we
may not always need to materialize the GOT base:
movq extern_gv@GOTPCREL(%rip), %rax
Direct calls are basically the same as they are in the small code model:
They use direct, PC-relative addressing, and the PLT is used for calls
to non-local functions.
This patch adds reasonably comprehensive testing of LEA, but there are
lots of interesting folding opportunities that are unimplemented.
I restricted the MCJIT/eh-lg-pic.ll test to Linux, since the large PIC
code model is not implemented for MachO yet.
Differential Revision: https://reviews.llvm.org/D47211
llvm-svn: 335508
2018-06-26 02:16:27 +08:00
|
|
|
|
2016-05-20 20:20:10 +08:00
|
|
|
// The COFF dynamic linker just patches the executable sections.
|
|
|
|
if (isTargetCOFF())
|
|
|
|
return X86II::MO_NO_FLAG;
|
|
|
|
|
|
|
|
if (isTargetDarwin()) {
|
|
|
|
// 32 bit macho has no relocation for a-b if a is undefined, even if
|
|
|
|
// b is in the section that is being relocated.
|
|
|
|
// This means we have to use o load even for GVs that are known to be
|
|
|
|
// local to the dso.
|
|
|
|
if (GV && (GV->isDeclarationForLinker() || GV->hasCommonLinkage()))
|
2016-05-18 00:01:32 +08:00
|
|
|
return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2009-07-11 04:53:38 +08:00
|
|
|
return X86II::MO_PIC_BASE_OFFSET;
|
|
|
|
}
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2016-05-20 20:20:10 +08:00
|
|
|
return X86II::MO_GOTOFF;
|
|
|
|
}
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2016-05-20 20:20:10 +08:00
|
|
|
unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
|
|
|
|
const Module &M) const {
|
2018-07-24 05:14:35 +08:00
|
|
|
// The static large model never uses stubs.
|
|
|
|
if (TM.getCodeModel() == CodeModel::Large && !isPositionIndependent())
|
2016-05-20 20:20:10 +08:00
|
|
|
return X86II::MO_NO_FLAG;
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2016-12-09 03:01:00 +08:00
|
|
|
// Absolute symbols can be referenced directly.
|
2017-02-02 08:32:03 +08:00
|
|
|
if (GV) {
|
|
|
|
if (Optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) {
|
|
|
|
// See if we can use the 8-bit immediate form. Note that some instructions
|
|
|
|
// will sign extend the immediate operand, so to be conservative we only
|
|
|
|
// accept the range [0,128).
|
|
|
|
if (CR->getUnsignedMax().ult(128))
|
|
|
|
return X86II::MO_ABS8;
|
|
|
|
else
|
|
|
|
return X86II::MO_NO_FLAG;
|
|
|
|
}
|
|
|
|
}
|
2016-12-09 03:01:00 +08:00
|
|
|
|
2017-10-28 05:18:48 +08:00
|
|
|
if (TM.shouldAssumeDSOLocal(M, GV))
|
2016-05-20 20:20:10 +08:00
|
|
|
return classifyLocalReference(GV);
|
2009-09-03 15:04:02 +08:00
|
|
|
|
2018-09-05 04:56:28 +08:00
|
|
|
if (isTargetCOFF()) {
|
|
|
|
if (GV->hasDLLImportStorageClass())
|
|
|
|
return X86II::MO_DLLIMPORT;
|
|
|
|
return X86II::MO_COFFSTUB;
|
|
|
|
}
|
2016-05-20 20:20:10 +08:00
|
|
|
|
2018-07-24 05:14:35 +08:00
|
|
|
if (is64Bit()) {
|
|
|
|
// ELF supports a large, truly PIC code model with non-PC relative GOT
|
|
|
|
// references. Other object file formats do not. Use the no-flag, 64-bit
|
|
|
|
// reference for them.
|
|
|
|
if (TM.getCodeModel() == CodeModel::Large)
|
|
|
|
return isTargetELF() ? X86II::MO_GOT : X86II::MO_NO_FLAG;
|
2016-05-20 20:20:10 +08:00
|
|
|
return X86II::MO_GOTPCREL;
|
2018-07-24 05:14:35 +08:00
|
|
|
}
|
2016-05-20 20:20:10 +08:00
|
|
|
|
|
|
|
if (isTargetDarwin()) {
|
2016-06-18 08:03:20 +08:00
|
|
|
if (!isPositionIndependent())
|
2016-05-20 20:20:10 +08:00
|
|
|
return X86II::MO_DARWIN_NONLAZY;
|
|
|
|
return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
|
2009-07-10 15:20:05 +08:00
|
|
|
}
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2016-05-20 20:20:10 +08:00
|
|
|
return X86II::MO_GOT;
|
2009-07-10 15:20:05 +08:00
|
|
|
}
|
|
|
|
|
2016-05-20 02:49:29 +08:00
|
|
|
unsigned char
|
|
|
|
X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV) const {
|
2016-05-20 20:20:10 +08:00
|
|
|
return classifyGlobalFunctionReference(GV, *GV->getParent());
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned char
|
|
|
|
X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
|
|
|
|
const Module &M) const {
|
2016-06-28 07:15:57 +08:00
|
|
|
if (TM.shouldAssumeDSOLocal(M, GV))
|
2016-05-20 20:20:10 +08:00
|
|
|
return X86II::MO_NO_FLAG;
|
|
|
|
|
2017-08-05 08:10:43 +08:00
|
|
|
if (isTargetCOFF()) {
|
|
|
|
assert(GV->hasDLLImportStorageClass() &&
|
|
|
|
"shouldAssumeDSOLocal gave inconsistent answer");
|
|
|
|
return X86II::MO_DLLIMPORT;
|
|
|
|
}
|
|
|
|
|
2017-11-08 08:01:05 +08:00
|
|
|
const Function *F = dyn_cast_or_null<Function>(GV);
|
|
|
|
|
2017-05-04 15:22:49 +08:00
|
|
|
if (isTargetELF()) {
|
|
|
|
if (is64Bit() && F && (CallingConv::X86_RegCall == F->getCallingConv()))
|
|
|
|
// According to psABI, PLT stub clobbers XMM8-XMM15.
|
|
|
|
// In Regcall calling convention those registers are used for passing
|
|
|
|
// parameters. Thus we need to prevent lazy binding in Regcall.
|
|
|
|
return X86II::MO_GOTPCREL;
|
2018-02-24 05:32:06 +08:00
|
|
|
// If PLT must be avoided then the call should be via GOTPCREL.
|
|
|
|
if (((F && F->hasFnAttribute(Attribute::NonLazyBind)) ||
|
|
|
|
(!F && M.getRtLibUseGOT())) &&
|
|
|
|
is64Bit())
|
|
|
|
return X86II::MO_GOTPCREL;
|
2016-04-20 16:32:57 +08:00
|
|
|
return X86II::MO_PLT;
|
2017-05-04 15:22:49 +08:00
|
|
|
}
|
2016-05-20 20:20:10 +08:00
|
|
|
|
|
|
|
if (is64Bit()) {
|
|
|
|
if (F && F->hasFnAttribute(Attribute::NonLazyBind))
|
|
|
|
// If the function is marked as non-lazy, generate an indirect call
|
|
|
|
// which loads from the GOT directly. This avoids runtime overhead
|
|
|
|
// at the cost of eager binding (and one extra byte of encoding).
|
|
|
|
return X86II::MO_GOTPCREL;
|
|
|
|
return X86II::MO_NO_FLAG;
|
2016-04-20 16:32:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return X86II::MO_NO_FLAG;
|
|
|
|
}
|
2006-12-01 06:42:55 +08:00
|
|
|
|
2015-08-14 23:11:42 +08:00
|
|
|
/// Return true if the subtarget allows calls to immediate address.
|
2016-05-20 02:49:29 +08:00
|
|
|
bool X86Subtarget::isLegalToCallImmediateAddr() const {
|
2014-03-29 05:40:47 +08:00
|
|
|
// FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32
|
|
|
|
// but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does,
|
|
|
|
// the following check for Win32 should be removed.
|
|
|
|
if (In64BitMode || isTargetWin32())
|
2009-05-20 12:53:57 +08:00
|
|
|
return false;
|
2016-05-20 06:07:57 +08:00
|
|
|
return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
|
2009-05-20 12:53:57 +08:00
|
|
|
}
|
|
|
|
|
2014-09-04 04:36:31 +08:00
|
|
|
void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
|
2013-02-27 13:56:20 +08:00
|
|
|
std::string CPUName = CPU;
|
2014-04-12 09:34:29 +08:00
|
|
|
if (CPUName.empty())
|
|
|
|
CPUName = "generic";
|
2011-07-09 07:43:01 +08:00
|
|
|
|
2014-04-12 09:34:29 +08:00
|
|
|
std::string FullFS = FS;
|
|
|
|
if (In64BitMode) {
|
2018-08-30 14:01:05 +08:00
|
|
|
// SSE2 should default to enabled in 64-bit mode, but can be turned off
|
|
|
|
// explicitly.
|
2014-04-12 09:34:29 +08:00
|
|
|
if (!FullFS.empty())
|
2018-08-30 14:01:05 +08:00
|
|
|
FullFS = "+sse2," + FullFS;
|
2014-04-12 09:34:29 +08:00
|
|
|
else
|
2018-08-30 14:01:05 +08:00
|
|
|
FullFS = "+sse2";
|
|
|
|
|
|
|
|
// If no CPU was specified, enable 64bit feature to satisy later check.
|
|
|
|
if (CPUName == "generic") {
|
|
|
|
if (!FullFS.empty())
|
|
|
|
FullFS = "+64bit," + FullFS;
|
|
|
|
else
|
|
|
|
FullFS = "+64bit";
|
|
|
|
}
|
2006-09-08 14:48:29 +08:00
|
|
|
}
|
2011-07-11 11:57:24 +08:00
|
|
|
|
2015-12-05 07:00:33 +08:00
|
|
|
// LAHF/SAHF are always supported in non-64-bit mode.
|
|
|
|
if (!In64BitMode) {
|
|
|
|
if (!FullFS.empty())
|
|
|
|
FullFS = "+sahf," + FullFS;
|
|
|
|
else
|
|
|
|
FullFS = "+sahf";
|
|
|
|
}
|
|
|
|
|
2015-07-11 06:33:01 +08:00
|
|
|
// Parse features string and set the CPU.
|
2014-04-12 09:34:29 +08:00
|
|
|
ParseSubtargetFeatures(CPUName, FullFS);
|
|
|
|
|
2015-08-26 00:29:21 +08:00
|
|
|
// All CPUs that implement SSE4.2 or SSE4A support unaligned accesses of
|
|
|
|
// 16-bytes and under that are reasonably fast. These features were
|
|
|
|
// introduced with Intel's Nehalem/Silvermont and AMD's Family10h
|
|
|
|
// micro-architectures respectively.
|
|
|
|
if (hasSSE42() || hasSSE4A())
|
2015-09-02 04:51:51 +08:00
|
|
|
IsUAMem16Slow = false;
|
2012-02-02 07:20:51 +08:00
|
|
|
|
2011-07-11 11:57:24 +08:00
|
|
|
// It's important to keep the MCSubtargetInfo feature bits in sync with
|
|
|
|
// target data structure which is shared with MC code emitter, etc.
|
|
|
|
if (In64BitMode)
|
|
|
|
ToggleFeature(X86::Mode64Bit);
|
2014-01-06 12:55:54 +08:00
|
|
|
else if (In32BitMode)
|
|
|
|
ToggleFeature(X86::Mode32Bit);
|
|
|
|
else if (In16BitMode)
|
|
|
|
ToggleFeature(X86::Mode16Bit);
|
|
|
|
else
|
|
|
|
llvm_unreachable("Not 16-bit, 32-bit or 64-bit mode!");
|
2011-07-11 11:57:24 +08:00
|
|
|
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
|
|
|
|
<< ", 3DNowLevel " << X863DNowLevel << ", 64bit "
|
|
|
|
<< HasX86_64 << "\n");
|
2018-08-30 14:01:05 +08:00
|
|
|
if (In64BitMode && !HasX86_64)
|
|
|
|
report_fatal_error("64-bit code requested on a subtarget that doesn't "
|
|
|
|
"support it!");
|
2006-09-08 14:48:29 +08:00
|
|
|
|
2016-05-05 19:35:51 +08:00
|
|
|
// Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both
|
2011-02-23 01:30:05 +08:00
|
|
|
// 32 and 64 bit) and for all 64-bit targets.
|
2011-06-24 01:54:54 +08:00
|
|
|
if (StackAlignOverride)
|
|
|
|
stackAlignment = StackAlignOverride;
|
2012-11-10 04:10:44 +08:00
|
|
|
else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
|
2016-05-05 19:35:51 +08:00
|
|
|
isTargetKFreeBSD() || In64BitMode)
|
2005-07-12 09:41:54 +08:00
|
|
|
stackAlignment = 16;
|
2017-11-19 09:11:58 +08:00
|
|
|
|
2017-11-26 02:09:37 +08:00
|
|
|
// Some CPUs have more overhead for gather. The specified overhead is relative
|
|
|
|
// to the Load operation. "2" is the number provided by Intel architects. This
|
2017-11-19 09:11:58 +08:00
|
|
|
// parameter is used for cost estimation of Gather Op and comparison with
|
|
|
|
// other alternatives.
|
2017-11-26 02:09:37 +08:00
|
|
|
// TODO: Remove the explicit hasAVX512()?, That would mean we would only
|
|
|
|
// enable gather with a -march.
|
|
|
|
if (hasAVX512() || (hasAVX2() && hasFastGather()))
|
2017-09-13 17:00:27 +08:00
|
|
|
GatherOverhead = 2;
|
|
|
|
if (hasAVX512())
|
|
|
|
ScatterOverhead = 2;
|
2018-01-20 08:26:08 +08:00
|
|
|
|
|
|
|
// Consume the vector width attribute or apply any target specific limit.
|
|
|
|
if (PreferVectorWidthOverride)
|
|
|
|
PreferVectorWidth = PreferVectorWidthOverride;
|
|
|
|
else if (Prefer256Bit)
|
|
|
|
PreferVectorWidth = 256;
|
2010-05-28 02:43:40 +08:00
|
|
|
}
|
2012-02-02 07:20:51 +08:00
|
|
|
|
2014-06-11 08:25:19 +08:00
|
|
|
X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
|
|
|
|
StringRef FS) {
|
2014-09-04 04:36:31 +08:00
|
|
|
initSubtargetFeatures(CPU, FS);
|
2014-06-11 08:25:19 +08:00
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2016-05-21 02:16:06 +08:00
|
|
|
X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
|
|
|
|
const X86TargetMachine &TM,
|
2018-01-20 08:26:08 +08:00
|
|
|
unsigned StackAlignOverride,
|
2018-02-11 16:06:27 +08:00
|
|
|
unsigned PreferVectorWidthOverride,
|
|
|
|
unsigned RequiredVectorWidth)
|
2018-06-09 17:19:40 +08:00
|
|
|
: X86GenSubtargetInfo(TT, CPU, FS),
|
2016-05-20 06:07:57 +08:00
|
|
|
PICStyle(PICStyles::None), TM(TM), TargetTriple(TT),
|
2014-05-08 05:05:47 +08:00
|
|
|
StackAlignOverride(StackAlignOverride),
|
2018-01-20 08:26:08 +08:00
|
|
|
PreferVectorWidthOverride(PreferVectorWidthOverride),
|
2018-02-11 16:06:27 +08:00
|
|
|
RequiredVectorWidth(RequiredVectorWidth),
|
2014-05-08 05:05:47 +08:00
|
|
|
In64BitMode(TargetTriple.getArch() == Triple::x86_64),
|
|
|
|
In32BitMode(TargetTriple.getArch() == Triple::x86 &&
|
|
|
|
TargetTriple.getEnvironment() != Triple::CODE16),
|
|
|
|
In16BitMode(TargetTriple.getArch() == Triple::x86 &&
|
2014-06-10 01:08:19 +08:00
|
|
|
TargetTriple.getEnvironment() == Triple::CODE16),
|
2017-04-30 01:30:09 +08:00
|
|
|
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
|
2017-05-19 19:08:33 +08:00
|
|
|
FrameLowering(*this, getStackAlignment()) {
|
2014-08-09 09:07:25 +08:00
|
|
|
// Determine the PICStyle based on the target selected.
|
2016-06-21 07:41:56 +08:00
|
|
|
if (!isPositionIndependent())
|
2014-08-09 09:07:25 +08:00
|
|
|
setPICStyle(PICStyles::None);
|
2016-06-21 07:41:56 +08:00
|
|
|
else if (is64Bit())
|
2014-08-09 09:07:25 +08:00
|
|
|
setPICStyle(PICStyles::RIPRel);
|
2016-06-21 07:41:56 +08:00
|
|
|
else if (isTargetCOFF())
|
2014-08-09 09:07:25 +08:00
|
|
|
setPICStyle(PICStyles::None);
|
2016-06-21 07:41:56 +08:00
|
|
|
else if (isTargetDarwin())
|
|
|
|
setPICStyle(PICStyles::StubPIC);
|
|
|
|
else if (isTargetELF())
|
2014-08-09 09:07:25 +08:00
|
|
|
setPICStyle(PICStyles::GOT);
|
2017-07-01 08:45:50 +08:00
|
|
|
|
2017-08-16 06:31:51 +08:00
|
|
|
CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering()));
|
|
|
|
Legalizer.reset(new X86LegalizerInfo(*this, TM));
|
2017-07-01 08:45:50 +08:00
|
|
|
|
|
|
|
auto *RBI = new X86RegisterBankInfo(*getRegisterInfo());
|
2017-08-16 06:31:51 +08:00
|
|
|
RegBankInfo.reset(RBI);
|
|
|
|
InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI));
|
2014-08-09 09:07:25 +08:00
|
|
|
}
|
2013-02-16 06:31:27 +08:00
|
|
|
|
2016-11-15 14:34:33 +08:00
|
|
|
const CallLowering *X86Subtarget::getCallLowering() const {
|
2017-08-16 06:31:51 +08:00
|
|
|
return CallLoweringInfo.get();
|
2016-11-15 14:34:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const InstructionSelector *X86Subtarget::getInstructionSelector() const {
|
2017-08-16 06:31:51 +08:00
|
|
|
return InstSelector.get();
|
2016-11-15 14:34:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const LegalizerInfo *X86Subtarget::getLegalizerInfo() const {
|
2017-08-16 06:31:51 +08:00
|
|
|
return Legalizer.get();
|
2016-11-15 14:34:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const RegisterBankInfo *X86Subtarget::getRegBankInfo() const {
|
2017-08-16 06:31:51 +08:00
|
|
|
return RegBankInfo.get();
|
2016-11-15 14:34:33 +08:00
|
|
|
}
|
|
|
|
|
2014-07-16 06:39:58 +08:00
|
|
|
bool X86Subtarget::enableEarlyIfConversion() const {
|
2014-05-22 07:51:57 +08:00
|
|
|
return hasCMov() && X86EarlyIfConv;
|
2014-05-22 07:40:26 +08:00
|
|
|
}
|