rename "slow-unaligned-mem-under-32" to slow-unaligned-mem-16" (NFCI)

This is a follow-on suggested by:
http://reviews.llvm.org/D12154 ( http://reviews.llvm.org/rL245729 )
http://reviews.llvm.org/D10662 ( http://reviews.llvm.org/rL245075 )

This makes the attribute name match most of the existing lowering logic
and regression test expectations.

But the current use of this attribute is inconsistent; see the FIXME
comment for "allowsMisalignedMemoryAccesses()". That change will
result in functional changes and should be coming soon.

llvm-svn: 246585
This commit is contained in:
Sanjay Patel 2015-09-01 20:51:51 +00:00
parent 87202a4aac
commit 30145677a8
5 changed files with 59 additions and 53 deletions

View File

@ -79,9 +79,10 @@ def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
"Bit testing of memory is slow">;
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
def FeatureSlowUAMem : SubtargetFeature<"slow-unaligned-mem-under-32",
"IsUAMemUnder32Slow", "true",
"Slow unaligned 16-byte-or-less memory access">;
// FIXME: This should not apply to CPUs that do not have SSE.
def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
"IsUAMem16Slow", "true",
"Slow unaligned 16-byte memory access">;
def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
"IsUAMem32Slow", "true",
"Slow unaligned 32-byte memory access">;
@ -209,42 +210,45 @@ def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, GenericModel, Features>;
def : Proc<"generic", [FeatureSlowUAMem]>;
def : Proc<"i386", [FeatureSlowUAMem]>;
def : Proc<"i486", [FeatureSlowUAMem]>;
def : Proc<"i586", [FeatureSlowUAMem]>;
def : Proc<"pentium", [FeatureSlowUAMem]>;
def : Proc<"pentium-mmx", [FeatureSlowUAMem, FeatureMMX]>;
def : Proc<"i686", [FeatureSlowUAMem]>;
def : Proc<"pentiumpro", [FeatureSlowUAMem, FeatureCMOV]>;
def : Proc<"pentium2", [FeatureSlowUAMem, FeatureMMX, FeatureCMOV]>;
def : Proc<"pentium3", [FeatureSlowUAMem, FeatureSSE1]>;
def : Proc<"pentium3m", [FeatureSlowUAMem, FeatureSSE1, FeatureSlowBTMem]>;
def : Proc<"pentium-m", [FeatureSlowUAMem, FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSlowUAMem, FeatureSSE2]>;
def : Proc<"pentium4m", [FeatureSlowUAMem, FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"generic", [FeatureSlowUAMem16]>;
def : Proc<"i386", [FeatureSlowUAMem16]>;
def : Proc<"i486", [FeatureSlowUAMem16]>;
def : Proc<"i586", [FeatureSlowUAMem16]>;
def : Proc<"pentium", [FeatureSlowUAMem16]>;
def : Proc<"pentium-mmx", [FeatureSlowUAMem16, FeatureMMX]>;
def : Proc<"i686", [FeatureSlowUAMem16]>;
def : Proc<"pentiumpro", [FeatureSlowUAMem16, FeatureCMOV]>;
def : Proc<"pentium2", [FeatureSlowUAMem16, FeatureMMX, FeatureCMOV]>;
def : Proc<"pentium3", [FeatureSlowUAMem16, FeatureSSE1]>;
def : Proc<"pentium3m", [FeatureSlowUAMem16, FeatureSSE1,
FeatureSlowBTMem]>;
def : Proc<"pentium-m", [FeatureSlowUAMem16, FeatureSSE2,
FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSlowUAMem16, FeatureSSE2]>;
def : Proc<"pentium4m", [FeatureSlowUAMem16, FeatureSSE2,
FeatureSlowBTMem]>;
// Intel Core Duo.
def : ProcessorModel<"yonah", SandyBridgeModel,
[FeatureSlowUAMem, FeatureSSE3, FeatureSlowBTMem]>;
[FeatureSlowUAMem16, FeatureSSE3, FeatureSlowBTMem]>;
// NetBurst.
def : Proc<"prescott", [FeatureSlowUAMem, FeatureSSE3, FeatureSlowBTMem]>;
def : Proc<"nocona", [FeatureSlowUAMem, FeatureSSE3, FeatureCMPXCHG16B,
def : Proc<"prescott", [FeatureSlowUAMem16, FeatureSSE3, FeatureSlowBTMem]>;
def : Proc<"nocona", [FeatureSlowUAMem16, FeatureSSE3, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
// Intel Core 2 Solo/Duo.
def : ProcessorModel<"core2", SandyBridgeModel,
[FeatureSlowUAMem, FeatureSSSE3, FeatureCMPXCHG16B,
[FeatureSlowUAMem16, FeatureSSSE3, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : ProcessorModel<"penryn", SandyBridgeModel,
[FeatureSlowUAMem, FeatureSSE41, FeatureCMPXCHG16B,
[FeatureSlowUAMem16, FeatureSSE41, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
// Atom CPUs.
class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
ProcIntelAtom,
FeatureSlowUAMem,
FeatureSlowUAMem16,
FeatureSSSE3,
FeatureCMPXCHG16B,
FeatureMOVBE,
@ -399,38 +403,38 @@ def : SkylakeProc<"skx">; // Legacy alias.
// AMD CPUs.
def : Proc<"k6", [FeatureSlowUAMem, FeatureMMX]>;
def : Proc<"k6-2", [FeatureSlowUAMem, Feature3DNow]>;
def : Proc<"k6-3", [FeatureSlowUAMem, Feature3DNow]>;
def : Proc<"athlon", [FeatureSlowUAMem, Feature3DNowA,
def : Proc<"k6", [FeatureSlowUAMem16, FeatureMMX]>;
def : Proc<"k6-2", [FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"k6-3", [FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"athlon", [FeatureSlowUAMem16, Feature3DNowA,
FeatureSlowBTMem, FeatureSlowSHLD]>;
def : Proc<"athlon-tbird", [FeatureSlowUAMem, Feature3DNowA,
def : Proc<"athlon-tbird", [FeatureSlowUAMem16, Feature3DNowA,
FeatureSlowBTMem, FeatureSlowSHLD]>;
def : Proc<"athlon-4", [FeatureSlowUAMem, FeatureSSE1, Feature3DNowA,
def : Proc<"athlon-4", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
FeatureSlowBTMem, FeatureSlowSHLD]>;
def : Proc<"athlon-xp", [FeatureSlowUAMem, FeatureSSE1, Feature3DNowA,
def : Proc<"athlon-xp", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
FeatureSlowBTMem, FeatureSlowSHLD]>;
def : Proc<"athlon-mp", [FeatureSlowUAMem, FeatureSSE1, Feature3DNowA,
def : Proc<"athlon-mp", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
FeatureSlowBTMem, FeatureSlowSHLD]>;
def : Proc<"k8", [FeatureSlowUAMem, FeatureSSE2, Feature3DNowA,
def : Proc<"k8", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>;
def : Proc<"opteron", [FeatureSlowUAMem, FeatureSSE2, Feature3DNowA,
def : Proc<"opteron", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>;
def : Proc<"athlon64", [FeatureSlowUAMem, FeatureSSE2, Feature3DNowA,
def : Proc<"athlon64", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>;
def : Proc<"athlon-fx", [FeatureSlowUAMem, FeatureSSE2, Feature3DNowA,
def : Proc<"athlon-fx", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>;
def : Proc<"k8-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
def : Proc<"k8-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowSHLD]>;
def : Proc<"opteron-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
def : Proc<"opteron-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowSHLD]>;
def : Proc<"athlon64-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
def : Proc<"athlon64-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowSHLD]>;
def : Proc<"amdfam10", [FeatureSSE4A,
@ -483,12 +487,12 @@ def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4,
FeatureTBM, FeatureFMA, FeatureSSE4A,
FeatureFSGSBase]>;
def : Proc<"geode", [FeatureSlowUAMem, Feature3DNowA]>;
def : Proc<"geode", [FeatureSlowUAMem16, Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureSlowUAMem, FeatureMMX]>;
def : Proc<"winchip2", [FeatureSlowUAMem, Feature3DNow]>;
def : Proc<"c3", [FeatureSlowUAMem, Feature3DNow]>;
def : Proc<"c3-2", [FeatureSlowUAMem, FeatureSSE1]>;
def : Proc<"winchip-c6", [FeatureSlowUAMem16, FeatureMMX]>;
def : Proc<"winchip2", [FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"c3", [FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"c3-2", [FeatureSlowUAMem16, FeatureSSE1]>;
// We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the

View File

@ -1869,7 +1869,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
if ((!IsMemset || ZeroMemset) &&
!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
if (Size >= 16 &&
(!Subtarget->isUnalignedMemUnder32Slow() ||
(!Subtarget->isUnalignedMem16Slow() ||
((DstAlign == 0 || DstAlign >= 16) &&
(SrcAlign == 0 || SrcAlign >= 16)))) {
if (Size >= 32) {
@ -1916,7 +1916,9 @@ X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
if (VT.getSizeInBits() == 256)
*Fast = !Subtarget->isUnalignedMem32Slow();
else
*Fast = !Subtarget->isUnalignedMemUnder32Slow();
// FIXME: We should always return that 8-byte and under accesses are fast.
// That is what other x86 lowering code assumes.
*Fast = !Subtarget->isUnalignedMem16Slow();
}
return true;
}

View File

@ -5511,7 +5511,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
// TODO: Check if 32-byte or greater accesses are slow too?
if (!MI->hasOneMemOperand() &&
RC == &X86::VR128RegClass &&
Subtarget.isUnalignedMemUnder32Slow())
Subtarget.isUnalignedMem16Slow())
// Without memoperands, loadRegFromAddr and storeRegToStackSlot will
// conservatively assume the address is unaligned. That's bad for
// performance.
@ -5659,7 +5659,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
cast<MachineSDNode>(N)->memoperands_end());
if (!(*MMOs.first) &&
RC == &X86::VR128RegClass &&
Subtarget.isUnalignedMemUnder32Slow())
Subtarget.isUnalignedMem16Slow())
// Do not introduce a slow unaligned load.
return false;
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
@ -5704,7 +5704,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
cast<MachineSDNode>(N)->memoperands_end());
if (!(*MMOs.first) &&
RC == &X86::VR128RegClass &&
Subtarget.isUnalignedMemUnder32Slow())
Subtarget.isUnalignedMem16Slow())
// Do not introduce a slow unaligned store.
return false;
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte

View File

@ -197,7 +197,7 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// introduced with Intel's Nehalem/Silvermont and AMD's Family10h
// micro-architectures respectively.
if (hasSSE42() || hasSSE4A())
IsUAMemUnder32Slow = false;
IsUAMem16Slow = false;
InstrItins = getInstrItineraryForCPU(CPUName);
@ -262,7 +262,7 @@ void X86Subtarget::initializeEnvironment() {
HasMPX = false;
IsBTMemSlow = false;
IsSHLDSlow = false;
IsUAMemUnder32Slow = false;
IsUAMem16Slow = false;
IsUAMem32Slow = false;
HasSSEUnalignedMem = false;
HasCmpxchg16b = false;

View File

@ -146,8 +146,8 @@ protected:
/// True if SHLD instructions are slow.
bool IsSHLDSlow;
/// True if unaligned memory accesses of 16-bytes or smaller are slow.
bool IsUAMemUnder32Slow;
/// True if unaligned memory accesses of 16-bytes are slow.
bool IsUAMem16Slow;
/// True if unaligned memory accesses of 32-bytes are slow.
bool IsUAMem32Slow;
@ -357,7 +357,7 @@ public:
bool hasRDSEED() const { return HasRDSEED; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isSHLDSlow() const { return IsSHLDSlow; }
bool isUnalignedMemUnder32Slow() const { return IsUAMemUnder32Slow; }
bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }