forked from OSchip/llvm-project
rename "slow-unaligned-mem-under-32" to slow-unaligned-mem-16" (NFCI)
This is a follow-on suggested by: http://reviews.llvm.org/D12154 ( http://reviews.llvm.org/rL245729 ) http://reviews.llvm.org/D10662 ( http://reviews.llvm.org/rL245075 ) This makes the attribute name match most of the existing lowering logic and regression test expectations. But the current use of this attribute is inconsistent; see the FIXME comment for "allowsMisalignedMemoryAccesses()". That change will result in functional changes and should be coming soon. llvm-svn: 246585
This commit is contained in:
parent
87202a4aac
commit
30145677a8
|
@ -79,9 +79,10 @@ def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
|
|||
"Bit testing of memory is slow">;
|
||||
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
|
||||
"SHLD instruction is slow">;
|
||||
def FeatureSlowUAMem : SubtargetFeature<"slow-unaligned-mem-under-32",
|
||||
"IsUAMemUnder32Slow", "true",
|
||||
"Slow unaligned 16-byte-or-less memory access">;
|
||||
// FIXME: This should not apply to CPUs that do not have SSE.
|
||||
def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
|
||||
"IsUAMem16Slow", "true",
|
||||
"Slow unaligned 16-byte memory access">;
|
||||
def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
|
||||
"IsUAMem32Slow", "true",
|
||||
"Slow unaligned 32-byte memory access">;
|
||||
|
@ -209,42 +210,45 @@ def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
|
|||
class Proc<string Name, list<SubtargetFeature> Features>
|
||||
: ProcessorModel<Name, GenericModel, Features>;
|
||||
|
||||
def : Proc<"generic", [FeatureSlowUAMem]>;
|
||||
def : Proc<"i386", [FeatureSlowUAMem]>;
|
||||
def : Proc<"i486", [FeatureSlowUAMem]>;
|
||||
def : Proc<"i586", [FeatureSlowUAMem]>;
|
||||
def : Proc<"pentium", [FeatureSlowUAMem]>;
|
||||
def : Proc<"pentium-mmx", [FeatureSlowUAMem, FeatureMMX]>;
|
||||
def : Proc<"i686", [FeatureSlowUAMem]>;
|
||||
def : Proc<"pentiumpro", [FeatureSlowUAMem, FeatureCMOV]>;
|
||||
def : Proc<"pentium2", [FeatureSlowUAMem, FeatureMMX, FeatureCMOV]>;
|
||||
def : Proc<"pentium3", [FeatureSlowUAMem, FeatureSSE1]>;
|
||||
def : Proc<"pentium3m", [FeatureSlowUAMem, FeatureSSE1, FeatureSlowBTMem]>;
|
||||
def : Proc<"pentium-m", [FeatureSlowUAMem, FeatureSSE2, FeatureSlowBTMem]>;
|
||||
def : Proc<"pentium4", [FeatureSlowUAMem, FeatureSSE2]>;
|
||||
def : Proc<"pentium4m", [FeatureSlowUAMem, FeatureSSE2, FeatureSlowBTMem]>;
|
||||
def : Proc<"generic", [FeatureSlowUAMem16]>;
|
||||
def : Proc<"i386", [FeatureSlowUAMem16]>;
|
||||
def : Proc<"i486", [FeatureSlowUAMem16]>;
|
||||
def : Proc<"i586", [FeatureSlowUAMem16]>;
|
||||
def : Proc<"pentium", [FeatureSlowUAMem16]>;
|
||||
def : Proc<"pentium-mmx", [FeatureSlowUAMem16, FeatureMMX]>;
|
||||
def : Proc<"i686", [FeatureSlowUAMem16]>;
|
||||
def : Proc<"pentiumpro", [FeatureSlowUAMem16, FeatureCMOV]>;
|
||||
def : Proc<"pentium2", [FeatureSlowUAMem16, FeatureMMX, FeatureCMOV]>;
|
||||
def : Proc<"pentium3", [FeatureSlowUAMem16, FeatureSSE1]>;
|
||||
def : Proc<"pentium3m", [FeatureSlowUAMem16, FeatureSSE1,
|
||||
FeatureSlowBTMem]>;
|
||||
def : Proc<"pentium-m", [FeatureSlowUAMem16, FeatureSSE2,
|
||||
FeatureSlowBTMem]>;
|
||||
def : Proc<"pentium4", [FeatureSlowUAMem16, FeatureSSE2]>;
|
||||
def : Proc<"pentium4m", [FeatureSlowUAMem16, FeatureSSE2,
|
||||
FeatureSlowBTMem]>;
|
||||
|
||||
// Intel Core Duo.
|
||||
def : ProcessorModel<"yonah", SandyBridgeModel,
|
||||
[FeatureSlowUAMem, FeatureSSE3, FeatureSlowBTMem]>;
|
||||
[FeatureSlowUAMem16, FeatureSSE3, FeatureSlowBTMem]>;
|
||||
|
||||
// NetBurst.
|
||||
def : Proc<"prescott", [FeatureSlowUAMem, FeatureSSE3, FeatureSlowBTMem]>;
|
||||
def : Proc<"nocona", [FeatureSlowUAMem, FeatureSSE3, FeatureCMPXCHG16B,
|
||||
def : Proc<"prescott", [FeatureSlowUAMem16, FeatureSSE3, FeatureSlowBTMem]>;
|
||||
def : Proc<"nocona", [FeatureSlowUAMem16, FeatureSSE3, FeatureCMPXCHG16B,
|
||||
FeatureSlowBTMem]>;
|
||||
|
||||
// Intel Core 2 Solo/Duo.
|
||||
def : ProcessorModel<"core2", SandyBridgeModel,
|
||||
[FeatureSlowUAMem, FeatureSSSE3, FeatureCMPXCHG16B,
|
||||
[FeatureSlowUAMem16, FeatureSSSE3, FeatureCMPXCHG16B,
|
||||
FeatureSlowBTMem]>;
|
||||
def : ProcessorModel<"penryn", SandyBridgeModel,
|
||||
[FeatureSlowUAMem, FeatureSSE41, FeatureCMPXCHG16B,
|
||||
[FeatureSlowUAMem16, FeatureSSE41, FeatureCMPXCHG16B,
|
||||
FeatureSlowBTMem]>;
|
||||
|
||||
// Atom CPUs.
|
||||
class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
|
||||
ProcIntelAtom,
|
||||
FeatureSlowUAMem,
|
||||
FeatureSlowUAMem16,
|
||||
FeatureSSSE3,
|
||||
FeatureCMPXCHG16B,
|
||||
FeatureMOVBE,
|
||||
|
@ -399,38 +403,38 @@ def : SkylakeProc<"skx">; // Legacy alias.
|
|||
|
||||
// AMD CPUs.
|
||||
|
||||
def : Proc<"k6", [FeatureSlowUAMem, FeatureMMX]>;
|
||||
def : Proc<"k6-2", [FeatureSlowUAMem, Feature3DNow]>;
|
||||
def : Proc<"k6-3", [FeatureSlowUAMem, Feature3DNow]>;
|
||||
def : Proc<"athlon", [FeatureSlowUAMem, Feature3DNowA,
|
||||
def : Proc<"k6", [FeatureSlowUAMem16, FeatureMMX]>;
|
||||
def : Proc<"k6-2", [FeatureSlowUAMem16, Feature3DNow]>;
|
||||
def : Proc<"k6-3", [FeatureSlowUAMem16, Feature3DNow]>;
|
||||
def : Proc<"athlon", [FeatureSlowUAMem16, Feature3DNowA,
|
||||
FeatureSlowBTMem, FeatureSlowSHLD]>;
|
||||
def : Proc<"athlon-tbird", [FeatureSlowUAMem, Feature3DNowA,
|
||||
def : Proc<"athlon-tbird", [FeatureSlowUAMem16, Feature3DNowA,
|
||||
FeatureSlowBTMem, FeatureSlowSHLD]>;
|
||||
def : Proc<"athlon-4", [FeatureSlowUAMem, FeatureSSE1, Feature3DNowA,
|
||||
def : Proc<"athlon-4", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
|
||||
FeatureSlowBTMem, FeatureSlowSHLD]>;
|
||||
def : Proc<"athlon-xp", [FeatureSlowUAMem, FeatureSSE1, Feature3DNowA,
|
||||
def : Proc<"athlon-xp", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
|
||||
FeatureSlowBTMem, FeatureSlowSHLD]>;
|
||||
def : Proc<"athlon-mp", [FeatureSlowUAMem, FeatureSSE1, Feature3DNowA,
|
||||
def : Proc<"athlon-mp", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
|
||||
FeatureSlowBTMem, FeatureSlowSHLD]>;
|
||||
def : Proc<"k8", [FeatureSlowUAMem, FeatureSSE2, Feature3DNowA,
|
||||
def : Proc<"k8", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
|
||||
Feature64Bit, FeatureSlowBTMem,
|
||||
FeatureSlowSHLD]>;
|
||||
def : Proc<"opteron", [FeatureSlowUAMem, FeatureSSE2, Feature3DNowA,
|
||||
def : Proc<"opteron", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
|
||||
Feature64Bit, FeatureSlowBTMem,
|
||||
FeatureSlowSHLD]>;
|
||||
def : Proc<"athlon64", [FeatureSlowUAMem, FeatureSSE2, Feature3DNowA,
|
||||
def : Proc<"athlon64", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
|
||||
Feature64Bit, FeatureSlowBTMem,
|
||||
FeatureSlowSHLD]>;
|
||||
def : Proc<"athlon-fx", [FeatureSlowUAMem, FeatureSSE2, Feature3DNowA,
|
||||
def : Proc<"athlon-fx", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
|
||||
Feature64Bit, FeatureSlowBTMem,
|
||||
FeatureSlowSHLD]>;
|
||||
def : Proc<"k8-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
|
||||
def : Proc<"k8-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
|
||||
FeatureCMPXCHG16B, FeatureSlowBTMem,
|
||||
FeatureSlowSHLD]>;
|
||||
def : Proc<"opteron-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
|
||||
def : Proc<"opteron-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
|
||||
FeatureCMPXCHG16B, FeatureSlowBTMem,
|
||||
FeatureSlowSHLD]>;
|
||||
def : Proc<"athlon64-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
|
||||
def : Proc<"athlon64-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
|
||||
FeatureCMPXCHG16B, FeatureSlowBTMem,
|
||||
FeatureSlowSHLD]>;
|
||||
def : Proc<"amdfam10", [FeatureSSE4A,
|
||||
|
@ -483,12 +487,12 @@ def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4,
|
|||
FeatureTBM, FeatureFMA, FeatureSSE4A,
|
||||
FeatureFSGSBase]>;
|
||||
|
||||
def : Proc<"geode", [FeatureSlowUAMem, Feature3DNowA]>;
|
||||
def : Proc<"geode", [FeatureSlowUAMem16, Feature3DNowA]>;
|
||||
|
||||
def : Proc<"winchip-c6", [FeatureSlowUAMem, FeatureMMX]>;
|
||||
def : Proc<"winchip2", [FeatureSlowUAMem, Feature3DNow]>;
|
||||
def : Proc<"c3", [FeatureSlowUAMem, Feature3DNow]>;
|
||||
def : Proc<"c3-2", [FeatureSlowUAMem, FeatureSSE1]>;
|
||||
def : Proc<"winchip-c6", [FeatureSlowUAMem16, FeatureMMX]>;
|
||||
def : Proc<"winchip2", [FeatureSlowUAMem16, Feature3DNow]>;
|
||||
def : Proc<"c3", [FeatureSlowUAMem16, Feature3DNow]>;
|
||||
def : Proc<"c3-2", [FeatureSlowUAMem16, FeatureSSE1]>;
|
||||
|
||||
// We also provide a generic 64-bit specific x86 processor model which tries to
|
||||
// be good for modern chips without enabling instruction set encodings past the
|
||||
|
|
|
@ -1869,7 +1869,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
|
|||
if ((!IsMemset || ZeroMemset) &&
|
||||
!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
|
||||
if (Size >= 16 &&
|
||||
(!Subtarget->isUnalignedMemUnder32Slow() ||
|
||||
(!Subtarget->isUnalignedMem16Slow() ||
|
||||
((DstAlign == 0 || DstAlign >= 16) &&
|
||||
(SrcAlign == 0 || SrcAlign >= 16)))) {
|
||||
if (Size >= 32) {
|
||||
|
@ -1916,7 +1916,9 @@ X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
|||
if (VT.getSizeInBits() == 256)
|
||||
*Fast = !Subtarget->isUnalignedMem32Slow();
|
||||
else
|
||||
*Fast = !Subtarget->isUnalignedMemUnder32Slow();
|
||||
// FIXME: We should always return that 8-byte and under accesses are fast.
|
||||
// That is what other x86 lowering code assumes.
|
||||
*Fast = !Subtarget->isUnalignedMem16Slow();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -5511,7 +5511,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
|
|||
// TODO: Check if 32-byte or greater accesses are slow too?
|
||||
if (!MI->hasOneMemOperand() &&
|
||||
RC == &X86::VR128RegClass &&
|
||||
Subtarget.isUnalignedMemUnder32Slow())
|
||||
Subtarget.isUnalignedMem16Slow())
|
||||
// Without memoperands, loadRegFromAddr and storeRegToStackSlot will
|
||||
// conservatively assume the address is unaligned. That's bad for
|
||||
// performance.
|
||||
|
@ -5659,7 +5659,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
|
|||
cast<MachineSDNode>(N)->memoperands_end());
|
||||
if (!(*MMOs.first) &&
|
||||
RC == &X86::VR128RegClass &&
|
||||
Subtarget.isUnalignedMemUnder32Slow())
|
||||
Subtarget.isUnalignedMem16Slow())
|
||||
// Do not introduce a slow unaligned load.
|
||||
return false;
|
||||
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
|
||||
|
@ -5704,7 +5704,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
|
|||
cast<MachineSDNode>(N)->memoperands_end());
|
||||
if (!(*MMOs.first) &&
|
||||
RC == &X86::VR128RegClass &&
|
||||
Subtarget.isUnalignedMemUnder32Slow())
|
||||
Subtarget.isUnalignedMem16Slow())
|
||||
// Do not introduce a slow unaligned store.
|
||||
return false;
|
||||
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
|
||||
|
|
|
@ -197,7 +197,7 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
|
|||
// introduced with Intel's Nehalem/Silvermont and AMD's Family10h
|
||||
// micro-architectures respectively.
|
||||
if (hasSSE42() || hasSSE4A())
|
||||
IsUAMemUnder32Slow = false;
|
||||
IsUAMem16Slow = false;
|
||||
|
||||
InstrItins = getInstrItineraryForCPU(CPUName);
|
||||
|
||||
|
@ -262,7 +262,7 @@ void X86Subtarget::initializeEnvironment() {
|
|||
HasMPX = false;
|
||||
IsBTMemSlow = false;
|
||||
IsSHLDSlow = false;
|
||||
IsUAMemUnder32Slow = false;
|
||||
IsUAMem16Slow = false;
|
||||
IsUAMem32Slow = false;
|
||||
HasSSEUnalignedMem = false;
|
||||
HasCmpxchg16b = false;
|
||||
|
|
|
@ -146,8 +146,8 @@ protected:
|
|||
/// True if SHLD instructions are slow.
|
||||
bool IsSHLDSlow;
|
||||
|
||||
/// True if unaligned memory accesses of 16-bytes or smaller are slow.
|
||||
bool IsUAMemUnder32Slow;
|
||||
/// True if unaligned memory accesses of 16-bytes are slow.
|
||||
bool IsUAMem16Slow;
|
||||
|
||||
/// True if unaligned memory accesses of 32-bytes are slow.
|
||||
bool IsUAMem32Slow;
|
||||
|
@ -357,7 +357,7 @@ public:
|
|||
bool hasRDSEED() const { return HasRDSEED; }
|
||||
bool isBTMemSlow() const { return IsBTMemSlow; }
|
||||
bool isSHLDSlow() const { return IsSHLDSlow; }
|
||||
bool isUnalignedMemUnder32Slow() const { return IsUAMemUnder32Slow; }
|
||||
bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
|
||||
bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
|
||||
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
|
||||
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
|
||||
|
|
Loading…
Reference in New Issue