2010-12-05 07:57:24 +08:00
|
|
|
//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
|
2005-07-12 09:41:54 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
2007-12-30 04:36:04 +08:00
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
2005-07-12 09:41:54 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2011-07-02 05:01:15 +08:00
|
|
|
// This file implements the X86 specific subclass of TargetSubtargetInfo.
|
2005-07-12 09:41:54 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "X86Subtarget.h"
|
2009-07-10 15:20:05 +08:00
|
|
|
#include "X86InstrInfo.h"
|
2014-08-09 09:07:25 +08:00
|
|
|
#include "X86TargetMachine.h"
|
2013-02-16 06:31:27 +08:00
|
|
|
#include "llvm/IR/Attributes.h"
|
|
|
|
#include "llvm/IR/Function.h"
|
2013-01-02 19:36:10 +08:00
|
|
|
#include "llvm/IR/GlobalValue.h"
|
2014-05-22 07:51:57 +08:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
2009-01-03 12:04:46 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
2011-09-08 00:10:57 +08:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2010-11-30 02:16:10 +08:00
|
|
|
#include "llvm/Support/Host.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2006-12-23 06:29:05 +08:00
|
|
|
#include "llvm/Target/TargetMachine.h"
|
2011-09-08 00:10:57 +08:00
|
|
|
#include "llvm/Target/TargetOptions.h"
|
2011-07-02 04:45:01 +08:00
|
|
|
|
2009-04-26 02:27:23 +08:00
|
|
|
#if defined(_MSC_VER)
|
2009-08-03 08:11:34 +08:00
|
|
|
#include <intrin.h>
|
2009-04-26 02:27:23 +08:00
|
|
|
#endif
|
|
|
|
|
2014-04-22 10:41:26 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "subtarget"
|
|
|
|
|
2014-04-22 10:03:14 +08:00
|
|
|
#define GET_SUBTARGETINFO_TARGET_DESC
|
|
|
|
#define GET_SUBTARGETINFO_CTOR
|
|
|
|
#include "X86GenSubtargetInfo.inc"
|
|
|
|
|
2014-05-22 07:40:26 +08:00
|
|
|
// Temporary option to control early if-conversion for x86 while adding machine
|
|
|
|
// models.
|
|
|
|
static cl::opt<bool>
|
|
|
|
X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
|
|
|
|
cl::desc("Enable early if-conversion on X86"));
|
|
|
|
|
|
|
|
|
2015-08-14 23:11:42 +08:00
|
|
|
/// Classify a blockaddress reference for the current subtarget according to how
|
|
|
|
/// we should reference it in a non-pcrel context.
|
2013-04-03 07:06:40 +08:00
|
|
|
unsigned char X86Subtarget::ClassifyBlockAddressReference() const {
|
2009-11-21 07:18:13 +08:00
|
|
|
if (isPICStyleGOT()) // 32-bit ELF targets.
|
|
|
|
return X86II::MO_GOTOFF;
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2009-11-21 07:18:13 +08:00
|
|
|
if (isPICStyleStubPIC()) // Darwin/32 in PIC mode.
|
|
|
|
return X86II::MO_PIC_BASE_OFFSET;
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2009-11-21 07:18:13 +08:00
|
|
|
// Direct static reference to label.
|
|
|
|
return X86II::MO_NO_FLAG;
|
|
|
|
}
|
|
|
|
|
2015-08-14 23:11:42 +08:00
|
|
|
/// Classify a global variable reference for the current subtarget according to
|
|
|
|
/// how we should reference it in a non-pcrel context.
|
2009-07-10 15:20:05 +08:00
|
|
|
unsigned char X86Subtarget::
|
|
|
|
ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
|
|
|
|
// DLLImport only exists on windows, it is implemented as a load from a
|
|
|
|
// DLLIMPORT stub.
|
2014-01-14 23:22:47 +08:00
|
|
|
if (GV->hasDLLImportStorageClass())
|
2009-07-10 15:20:05 +08:00
|
|
|
return X86II::MO_DLLIMPORT;
|
|
|
|
|
2015-07-06 04:52:35 +08:00
|
|
|
bool isDef = GV->isStrongDefinitionForLinker();
|
2009-07-17 06:53:10 +08:00
|
|
|
|
2009-07-10 15:20:05 +08:00
|
|
|
// X86-64 in PIC mode.
|
|
|
|
if (isPICStyleRIPRel()) {
|
|
|
|
// Large model never uses stubs.
|
|
|
|
if (TM.getCodeModel() == CodeModel::Large)
|
|
|
|
return X86II::MO_NO_FLAG;
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2009-07-11 05:01:59 +08:00
|
|
|
if (isTargetDarwin()) {
|
|
|
|
// If symbol visibility is hidden, the extra load is not needed if
|
|
|
|
// target is x86-64 or the symbol is definitely defined in the current
|
|
|
|
// translation unit.
|
2015-07-06 04:52:35 +08:00
|
|
|
if (GV->hasDefaultVisibility() && !isDef)
|
2009-07-11 05:01:59 +08:00
|
|
|
return X86II::MO_GOTPCREL;
|
2010-08-22 01:21:11 +08:00
|
|
|
} else if (!isTargetWin64()) {
|
2009-07-11 05:01:59 +08:00
|
|
|
assert(isTargetELF() && "Unknown rip-relative target");
|
2009-07-10 15:20:05 +08:00
|
|
|
|
2016-04-23 05:41:58 +08:00
|
|
|
// Extra load is needed for all externally visible globals except with
|
|
|
|
// PIE as the definition of the global in an executable is not
|
|
|
|
// overridden.
|
|
|
|
|
|
|
|
if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility() &&
|
|
|
|
!isGlobalDefinedInPIE(GV, TM))
|
2009-07-11 05:01:59 +08:00
|
|
|
return X86II::MO_GOTPCREL;
|
|
|
|
}
|
2009-07-10 15:20:05 +08:00
|
|
|
|
|
|
|
return X86II::MO_NO_FLAG;
|
|
|
|
}
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2009-07-10 15:20:05 +08:00
|
|
|
if (isPICStyleGOT()) { // 32-bit ELF targets.
|
2016-04-23 05:41:58 +08:00
|
|
|
// Extra load is needed for all externally visible globals except with
|
|
|
|
// PIE as the definition of the global in an executable is not overridden.
|
|
|
|
|
|
|
|
if (GV->hasLocalLinkage() || GV->hasHiddenVisibility() ||
|
|
|
|
isGlobalDefinedInPIE(GV, TM))
|
2009-07-10 15:20:05 +08:00
|
|
|
return X86II::MO_GOTOFF;
|
|
|
|
return X86II::MO_GOT;
|
|
|
|
}
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2009-07-11 05:00:45 +08:00
|
|
|
if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode.
|
2009-07-11 04:53:38 +08:00
|
|
|
// Determine whether we have a stub reference and/or whether the reference
|
|
|
|
// is relative to the PIC base or not.
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2009-07-10 15:20:05 +08:00
|
|
|
// If this is a strong reference to a definition, it is definitely not
|
|
|
|
// through a stub.
|
2015-07-06 04:52:35 +08:00
|
|
|
if (isDef)
|
2009-07-11 04:53:38 +08:00
|
|
|
return X86II::MO_PIC_BASE_OFFSET;
|
2009-07-10 15:20:05 +08:00
|
|
|
|
|
|
|
// Unless we have a symbol with hidden visibility, we have to go through a
|
|
|
|
// normal $non_lazy_ptr stub because this symbol might be resolved late.
|
2016-05-18 00:01:32 +08:00
|
|
|
if (!GV->hasHiddenVisibility()) // $non_lazy_ptr reference.
|
2009-07-11 04:53:38 +08:00
|
|
|
return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2009-07-11 04:53:38 +08:00
|
|
|
// If symbol visibility is hidden, we have a stub for common symbol
|
|
|
|
// references and external declarations.
|
2015-07-06 04:52:35 +08:00
|
|
|
if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) {
|
2016-05-18 00:01:32 +08:00
|
|
|
// $non_lazy_ptr reference.
|
|
|
|
return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
|
2009-07-10 15:20:05 +08:00
|
|
|
}
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2009-07-11 04:53:38 +08:00
|
|
|
// Otherwise, no stub.
|
|
|
|
return X86II::MO_PIC_BASE_OFFSET;
|
|
|
|
}
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2009-07-11 05:00:45 +08:00
|
|
|
if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode.
|
2009-07-11 04:53:38 +08:00
|
|
|
// Determine whether we have a stub reference.
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2009-07-11 04:53:38 +08:00
|
|
|
// If this is a strong reference to a definition, it is definitely not
|
|
|
|
// through a stub.
|
2015-07-06 04:52:35 +08:00
|
|
|
if (isDef)
|
2009-07-11 04:53:38 +08:00
|
|
|
return X86II::MO_NO_FLAG;
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2009-07-11 04:53:38 +08:00
|
|
|
// Unless we have a symbol with hidden visibility, we have to go through a
|
|
|
|
// normal $non_lazy_ptr stub because this symbol might be resolved late.
|
|
|
|
if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference.
|
|
|
|
return X86II::MO_DARWIN_NONLAZY;
|
2009-09-03 15:04:02 +08:00
|
|
|
|
2009-07-10 15:20:05 +08:00
|
|
|
// Otherwise, no stub.
|
2009-07-11 04:53:38 +08:00
|
|
|
return X86II::MO_NO_FLAG;
|
2009-07-10 15:20:05 +08:00
|
|
|
}
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2009-07-10 15:20:05 +08:00
|
|
|
// Direct static reference to global.
|
|
|
|
return X86II::MO_NO_FLAG;
|
|
|
|
}
|
|
|
|
|
2016-04-20 16:32:57 +08:00
|
|
|
unsigned char X86Subtarget::classifyGlobalFunctionReference(
|
|
|
|
const GlobalValue *GV, const TargetMachine &TM) const {
|
|
|
|
// On ELF targets, in both X86-64 and X86-32 mode, direct calls to
|
|
|
|
// external symbols most go through the PLT in PIC mode. If the symbol
|
|
|
|
// has hidden or protected visibility, or if it is static or local, then
|
|
|
|
// we don't need to use the PLT - we can directly call it.
|
|
|
|
// In PIE mode, calls to global functions don't need to go through PLT
|
|
|
|
if (isTargetELF() && TM.getRelocationModel() == Reloc::PIC_ &&
|
2016-04-30 05:19:16 +08:00
|
|
|
!isGlobalDefinedInPIE(GV, TM) &&
|
2016-04-20 16:32:57 +08:00
|
|
|
GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
|
|
|
|
return X86II::MO_PLT;
|
|
|
|
} else if (isPICStyleStubAny() && !GV->isStrongDefinitionForLinker() &&
|
|
|
|
(!getTargetTriple().isMacOSX() ||
|
|
|
|
getTargetTriple().isMacOSXVersionLT(10, 5))) {
|
|
|
|
// PC-relative references to external symbols should go through $stub,
|
|
|
|
// unless we're building with the leopard linker or later, which
|
|
|
|
// automatically synthesizes these stubs.
|
|
|
|
return X86II::MO_DARWIN_STUB;
|
|
|
|
} else if (isPICStyleRIPRel() && isa<Function>(GV) &&
|
|
|
|
cast<Function>(GV)->hasFnAttribute(Attribute::NonLazyBind)) {
|
|
|
|
// If the function is marked as non-lazy, generate an indirect call
|
|
|
|
// which loads from the GOT directly. This avoids runtime overhead
|
|
|
|
// at the cost of eager binding (and one extra byte of encoding).
|
|
|
|
return X86II::MO_GOTPCREL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return X86II::MO_NO_FLAG;
|
|
|
|
}
|
2006-12-01 06:42:55 +08:00
|
|
|
|
2015-08-14 23:11:42 +08:00
|
|
|
/// This function returns the name of a function which has an interface like
|
|
|
|
/// the non-standard bzero function, if such a function exists on the
|
|
|
|
/// current subtarget and it is considered preferable over memset with zero
|
2008-10-01 05:22:07 +08:00
|
|
|
/// passed as the second argument. Otherwise it returns null.
|
2008-10-01 06:05:33 +08:00
|
|
|
const char *X86Subtarget::getBZeroEntry() const {
|
2008-04-02 04:38:36 +08:00
|
|
|
// Darwin 10 has a __bzero entry point for this purpose.
|
2011-04-20 08:14:25 +08:00
|
|
|
if (getTargetTriple().isMacOSX() &&
|
|
|
|
!getTargetTriple().isMacOSXVersionLT(10, 6))
|
2008-10-01 06:05:33 +08:00
|
|
|
return "__bzero";
|
2008-04-02 04:38:36 +08:00
|
|
|
|
2014-04-25 13:30:21 +08:00
|
|
|
return nullptr;
|
2008-04-02 04:38:36 +08:00
|
|
|
}
|
|
|
|
|
2013-01-29 10:32:37 +08:00
|
|
|
bool X86Subtarget::hasSinCos() const {
|
|
|
|
return getTargetTriple().isMacOSX() &&
|
2013-01-31 06:56:35 +08:00
|
|
|
!getTargetTriple().isMacOSXVersionLT(10, 9) &&
|
|
|
|
is64Bit();
|
2013-01-29 10:32:37 +08:00
|
|
|
}
|
|
|
|
|
2015-08-14 23:11:42 +08:00
|
|
|
/// Return true if the subtarget allows calls to immediate address.
|
2009-05-20 12:53:57 +08:00
|
|
|
bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
|
2014-03-29 05:40:47 +08:00
|
|
|
// FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32
|
|
|
|
// but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does,
|
|
|
|
// the following check for Win32 should be removed.
|
|
|
|
if (In64BitMode || isTargetWin32())
|
2009-05-20 12:53:57 +08:00
|
|
|
return false;
|
|
|
|
return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
|
|
|
|
}
|
|
|
|
|
2014-09-04 04:36:31 +08:00
|
|
|
void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
|
2013-02-27 13:56:20 +08:00
|
|
|
std::string CPUName = CPU;
|
2014-04-12 09:34:29 +08:00
|
|
|
if (CPUName.empty())
|
|
|
|
CPUName = "generic";
|
2011-07-09 07:43:01 +08:00
|
|
|
|
2014-04-12 09:34:29 +08:00
|
|
|
// Make sure 64-bit features are available in 64-bit mode. (But make sure
|
|
|
|
// SSE2 can be turned off explicitly.)
|
|
|
|
std::string FullFS = FS;
|
|
|
|
if (In64BitMode) {
|
|
|
|
if (!FullFS.empty())
|
|
|
|
FullFS = "+64bit,+sse2," + FullFS;
|
|
|
|
else
|
|
|
|
FullFS = "+64bit,+sse2";
|
2006-09-08 14:48:29 +08:00
|
|
|
}
|
2011-07-11 11:57:24 +08:00
|
|
|
|
2015-12-05 07:00:33 +08:00
|
|
|
// LAHF/SAHF are always supported in non-64-bit mode.
|
|
|
|
if (!In64BitMode) {
|
|
|
|
if (!FullFS.empty())
|
|
|
|
FullFS = "+sahf," + FullFS;
|
|
|
|
else
|
|
|
|
FullFS = "+sahf";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-07-11 06:33:01 +08:00
|
|
|
// Parse features string and set the CPU.
|
2014-04-12 09:34:29 +08:00
|
|
|
ParseSubtargetFeatures(CPUName, FullFS);
|
|
|
|
|
2015-08-26 00:29:21 +08:00
|
|
|
// All CPUs that implement SSE4.2 or SSE4A support unaligned accesses of
|
|
|
|
// 16-bytes and under that are reasonably fast. These features were
|
|
|
|
// introduced with Intel's Nehalem/Silvermont and AMD's Family10h
|
|
|
|
// micro-architectures respectively.
|
|
|
|
if (hasSSE42() || hasSSE4A())
|
2015-09-02 04:51:51 +08:00
|
|
|
IsUAMem16Slow = false;
|
2015-08-26 00:29:21 +08:00
|
|
|
|
2012-08-07 08:25:30 +08:00
|
|
|
InstrItins = getInstrItineraryForCPU(CPUName);
|
2012-02-02 07:20:51 +08:00
|
|
|
|
2011-07-11 11:57:24 +08:00
|
|
|
// It's important to keep the MCSubtargetInfo feature bits in sync with
|
|
|
|
// target data structure which is shared with MC code emitter, etc.
|
|
|
|
if (In64BitMode)
|
|
|
|
ToggleFeature(X86::Mode64Bit);
|
2014-01-06 12:55:54 +08:00
|
|
|
else if (In32BitMode)
|
|
|
|
ToggleFeature(X86::Mode32Bit);
|
|
|
|
else if (In16BitMode)
|
|
|
|
ToggleFeature(X86::Mode16Bit);
|
|
|
|
else
|
|
|
|
llvm_unreachable("Not 16-bit, 32-bit or 64-bit mode!");
|
2011-07-11 11:57:24 +08:00
|
|
|
|
2010-01-05 09:29:13 +08:00
|
|
|
DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
|
2009-08-03 08:11:34 +08:00
|
|
|
<< ", 3DNowLevel " << X863DNowLevel
|
|
|
|
<< ", 64bit " << HasX86_64 << "\n");
|
2011-07-08 05:06:52 +08:00
|
|
|
assert((!In64BitMode || HasX86_64) &&
|
2009-02-03 08:04:43 +08:00
|
|
|
"64-bit code requested on a subtarget that doesn't support it!");
|
2006-09-08 14:48:29 +08:00
|
|
|
|
2016-05-05 19:35:51 +08:00
|
|
|
// Stack alignment is 16 bytes on Darwin, Linux, kFreeBSD and Solaris (both
|
2011-02-23 01:30:05 +08:00
|
|
|
// 32 and 64 bit) and for all 64-bit targets.
|
2011-06-24 01:54:54 +08:00
|
|
|
if (StackAlignOverride)
|
|
|
|
stackAlignment = StackAlignOverride;
|
2012-11-10 04:10:44 +08:00
|
|
|
else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
|
2016-05-05 19:35:51 +08:00
|
|
|
isTargetKFreeBSD() || In64BitMode)
|
2005-07-12 09:41:54 +08:00
|
|
|
stackAlignment = 16;
|
2010-05-28 02:43:40 +08:00
|
|
|
}
|
2012-02-02 07:20:51 +08:00
|
|
|
|
2013-02-16 09:36:26 +08:00
|
|
|
void X86Subtarget::initializeEnvironment() {
|
Move the MMX subtarget feature out of the SSE set of features and into
its own variable.
This is needed so that we can explicitly turn off MMX without turning
off SSE and also so that we can diagnose feature set incompatibilities
that involve MMX without SSE.
Rationale:
// sse3
__m128d test_mm_addsub_pd(__m128d A, __m128d B) {
return _mm_addsub_pd(A, B);
}
// mmx
void shift(__m64 a, __m64 b, int c) {
_mm_slli_pi16(a, c);
_mm_slli_pi32(a, c);
_mm_slli_si64(a, c);
_mm_srli_pi16(a, c);
_mm_srli_pi32(a, c);
_mm_srli_si64(a, c);
_mm_srai_pi16(a, c);
_mm_srai_pi32(a, c);
}
clang -msse3 -mno-mmx file.c -c
For this code we should be able to explicitly turn off MMX
without affecting the compilation of the SSE3 function and then
diagnose and error on compiling the MMX function.
This matches the existing gcc behavior and follows the spirit of
the SSE/MMX separation in llvm where we can (and do) turn off
MMX code generation except in the presence of intrinsics.
Updated a couple of tests, but primarily tested with a couple of tests
for turning on only mmx and only sse.
This is paired with a patch to clang to take advantage of this behavior.
llvm-svn: 249731
2015-10-09 04:10:06 +08:00
|
|
|
X86SSELevel = NoSSE;
|
2013-02-16 09:36:26 +08:00
|
|
|
X863DNowLevel = NoThreeDNow;
|
2016-03-23 19:13:54 +08:00
|
|
|
HasX87 = false;
|
2013-02-16 09:36:26 +08:00
|
|
|
HasCMov = false;
|
|
|
|
HasX86_64 = false;
|
|
|
|
HasPOPCNT = false;
|
|
|
|
HasSSE4A = false;
|
|
|
|
HasAES = false;
|
2015-10-16 14:03:09 +08:00
|
|
|
HasFXSR = false;
|
2015-10-12 19:47:46 +08:00
|
|
|
HasXSAVE = false;
|
|
|
|
HasXSAVEOPT = false;
|
|
|
|
HasXSAVEC = false;
|
|
|
|
HasXSAVES = false;
|
2013-02-16 09:36:26 +08:00
|
|
|
HasPCLMUL = false;
|
|
|
|
HasFMA = false;
|
|
|
|
HasFMA4 = false;
|
|
|
|
HasXOP = false;
|
2013-09-25 02:21:52 +08:00
|
|
|
HasTBM = false;
|
2013-02-16 09:36:26 +08:00
|
|
|
HasMOVBE = false;
|
|
|
|
HasRDRAND = false;
|
|
|
|
HasF16C = false;
|
|
|
|
HasFSGSBase = false;
|
|
|
|
HasLZCNT = false;
|
|
|
|
HasBMI = false;
|
|
|
|
HasBMI2 = false;
|
2016-01-17 21:42:12 +08:00
|
|
|
HasVBMI = false;
|
2016-01-24 18:41:28 +08:00
|
|
|
HasIFMA = false;
|
2013-02-16 09:36:26 +08:00
|
|
|
HasRTM = false;
|
2013-03-27 06:46:02 +08:00
|
|
|
HasHLE = false;
|
2013-07-28 16:28:38 +08:00
|
|
|
HasERI = false;
|
|
|
|
HasCDI = false;
|
2013-08-20 13:23:59 +08:00
|
|
|
HasPFI = false;
|
2014-07-21 22:54:21 +08:00
|
|
|
HasDQI = false;
|
|
|
|
HasBWI = false;
|
|
|
|
HasVLX = false;
|
2013-02-16 09:36:26 +08:00
|
|
|
HasADX = false;
|
2015-12-15 21:35:29 +08:00
|
|
|
HasPKU = false;
|
2013-09-12 23:51:31 +08:00
|
|
|
HasSHA = false;
|
2013-03-27 01:47:11 +08:00
|
|
|
HasPRFCHW = false;
|
2013-03-29 07:41:26 +08:00
|
|
|
HasRDSEED = false;
|
2015-12-05 07:00:33 +08:00
|
|
|
HasLAHFSAHF = false;
|
2015-06-03 18:30:57 +08:00
|
|
|
HasMPX = false;
|
2013-02-16 09:36:26 +08:00
|
|
|
IsBTMemSlow = false;
|
SHLD/SHRD are VectorPath (microcode) instructions known to have poor latency on certain architectures. While generating SHLD/SHRD instructions is acceptable when optimizing for size, optimizing for speed on these platforms should be implemented using alternative sequences of instructions composed of add, adc, shr, shl, or and lea which are directPath instructions. These alternative instructions not only have a lower latency but they also increase the decode bandwidth by allowing simultaneous decoding of a third directPath instruction.
AMD's processors family K7, K8, K10, K12, K15 and K16 are known to have SHLD/SHRD instructions with very poor latency. Optimization guides for these processors recommend using an alternative sequence of instructions. For these AMD's processors, I disabled folding (or (x << c) | (y >> (64 - c))) when we are not optimizing for size.
It might be beneficial to disable this folding for some of the Intel's processors. However, since I couldn't find specific recommendations regarding using SHLD/SHRD instructions on Intel's processors, I haven't disabled this peephole for Intel.
llvm-svn: 195383
2013-11-22 07:21:26 +08:00
|
|
|
IsSHLDSlow = false;
|
2015-09-02 04:51:51 +08:00
|
|
|
IsUAMem16Slow = false;
|
2014-11-22 01:40:04 +08:00
|
|
|
IsUAMem32Slow = false;
|
2015-02-04 01:13:04 +08:00
|
|
|
HasSSEUnalignedMem = false;
|
2013-02-16 09:36:26 +08:00
|
|
|
HasCmpxchg16b = false;
|
|
|
|
UseLeaForSP = false;
|
2016-02-13 07:37:57 +08:00
|
|
|
HasFastPartialYMMWrite = false;
|
2014-11-21 19:19:34 +08:00
|
|
|
HasSlowDivide32 = false;
|
|
|
|
HasSlowDivide64 = false;
|
2013-02-16 09:36:26 +08:00
|
|
|
PadShortFunctions = false;
|
2013-03-28 03:14:02 +08:00
|
|
|
CallRegIndirect = false;
|
2013-04-26 04:29:37 +08:00
|
|
|
LEAUsesAG = false;
|
2014-05-20 16:55:50 +08:00
|
|
|
SlowLEA = false;
|
2014-06-09 19:40:41 +08:00
|
|
|
SlowIncDec = false;
|
2013-02-16 09:36:26 +08:00
|
|
|
stackAlignment = 4;
|
|
|
|
// FIXME: this is a known good value for Yonah. How about others?
|
|
|
|
MaxInlineSizeThreshold = 128;
|
2015-05-12 09:26:05 +08:00
|
|
|
UseSoftFloat = false;
|
2013-02-16 09:36:26 +08:00
|
|
|
}
|
|
|
|
|
2014-06-11 08:25:19 +08:00
|
|
|
X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
|
|
|
|
StringRef FS) {
|
|
|
|
initializeEnvironment();
|
2014-09-04 04:36:31 +08:00
|
|
|
initSubtargetFeatures(CPU, FS);
|
2014-06-11 08:25:19 +08:00
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2015-06-10 20:11:26 +08:00
|
|
|
X86Subtarget::X86Subtarget(const Triple &TT, const std::string &CPU,
|
2014-10-02 04:38:22 +08:00
|
|
|
const std::string &FS, const X86TargetMachine &TM,
|
2014-06-10 01:08:19 +08:00
|
|
|
unsigned StackAlignOverride)
|
2015-09-16 00:17:27 +08:00
|
|
|
: X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
|
2014-05-08 05:05:47 +08:00
|
|
|
PICStyle(PICStyles::None), TargetTriple(TT),
|
|
|
|
StackAlignOverride(StackAlignOverride),
|
|
|
|
In64BitMode(TargetTriple.getArch() == Triple::x86_64),
|
|
|
|
In32BitMode(TargetTriple.getArch() == Triple::x86 &&
|
|
|
|
TargetTriple.getEnvironment() != Triple::CODE16),
|
|
|
|
In16BitMode(TargetTriple.getArch() == Triple::x86 &&
|
2014-06-10 01:08:19 +08:00
|
|
|
TargetTriple.getEnvironment() == Triple::CODE16),
|
2015-07-09 10:10:08 +08:00
|
|
|
TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
|
|
|
|
TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) {
|
2014-08-09 09:07:25 +08:00
|
|
|
// Determine the PICStyle based on the target selected.
|
|
|
|
if (TM.getRelocationModel() == Reloc::Static) {
|
|
|
|
// Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
|
|
|
|
setPICStyle(PICStyles::None);
|
|
|
|
} else if (is64Bit()) {
|
|
|
|
// PIC in 64 bit mode is always rip-rel.
|
|
|
|
setPICStyle(PICStyles::RIPRel);
|
|
|
|
} else if (isTargetCOFF()) {
|
|
|
|
setPICStyle(PICStyles::None);
|
|
|
|
} else if (isTargetDarwin()) {
|
|
|
|
if (TM.getRelocationModel() == Reloc::PIC_)
|
|
|
|
setPICStyle(PICStyles::StubPIC);
|
|
|
|
else {
|
|
|
|
assert(TM.getRelocationModel() == Reloc::DynamicNoPIC);
|
|
|
|
setPICStyle(PICStyles::StubDynamicNoPIC);
|
|
|
|
}
|
|
|
|
} else if (isTargetELF()) {
|
|
|
|
setPICStyle(PICStyles::GOT);
|
|
|
|
}
|
|
|
|
}
|
2013-02-16 06:31:27 +08:00
|
|
|
|
2014-07-16 06:39:58 +08:00
|
|
|
bool X86Subtarget::enableEarlyIfConversion() const {
|
2014-05-22 07:51:57 +08:00
|
|
|
return hasCMov() && X86EarlyIfConv;
|
2014-05-22 07:40:26 +08:00
|
|
|
}
|
2014-07-16 06:39:58 +08:00
|
|
|
|