diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index bbb1dbdc9a4a..50d54b436e07 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -43,6 +43,15 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2", def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision floating point">; +// Some processors have multiply-accumulate instructions that don't +// play nicely with other VFP instructions, and it's generally better +// to just not use them. +// FIXME: Currently, this is only flagged for Cortex-A8. It may be true for +// others as well. We should do more benchmarking and confirm one way or +// the other. +def HasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true", + "Disable VFP MAC instructions">; + //===----------------------------------------------------------------------===// // ARM Processors supported. // @@ -106,7 +115,7 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, - [ArchV7A, FeatureThumb2, FeatureNEON]>; + [ArchV7A, FeatureThumb2, FeatureNEON, HasSlowVMLx]>; def : ProcNoItin<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 8df59e4789e3..76ed66fe7df4 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -26,10 +26,6 @@ static cl::opt UseNEONFP("arm-use-neon-fp", cl::desc("Use NEON for single-precision FP"), cl::init(false), cl::Hidden); -static cl::opt -UseVMLxInstructions("arm-use-vmlx", - cl::desc("Use VFP vmla and vmls instructions"), - cl::init(true), cl::Hidden); static cl::opt UseMOVT("arm-use-movt", @@ -40,7 +36,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, : ARMArchVersion(V4) , ARMFPUType(None) , UseNEONForSinglePrecisionFP(UseNEONFP) - , UseVMLx(UseVMLxInstructions) + , SlowVMLx(false) , IsThumb(isT) , ThumbMode(Thumb1) , PostRAScheduler(false) @@ -127,12 +123,6 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, // operations with NEON instructions. if (UseNEONFP.getPosition() == 0) UseNEONForSinglePrecisionFP = true; - // The VFP vlma and vlms instructions don't play nicely with others; - // disable them. - // FIXME: This may be true for other variants as well. Get benchmark - // numbers and add them if determined that's the case. - if (UseVMLxInstructions.getPosition() == 0) - UseVMLx = false; } } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 686684cab811..fa56a9195bc2 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -50,9 +50,9 @@ protected: /// determine if NEON should actually be used. bool UseNEONForSinglePrecisionFP; - /// UseVMLx - If the VFP2 instructions are available, indicates whether - /// the VML[AS] instructions should be used. - bool UseVMLx; + /// SlowVMLx - If the VFP2 instructions are available, indicates whether + /// the VML[AS] instructions are slow (if so, don't use them). + bool SlowVMLx; /// IsThumb - True if we are in thumb mode, false if in ARM mode. bool IsThumb; @@ -123,7 +123,7 @@ protected: bool hasNEON() const { return ARMFPUType >= NEON; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } - bool useVMLx() const {return hasVFP2() && UseVMLx; } + bool useVMLx() const {return hasVFP2() && !SlowVMLx; } bool hasFP16() const { return HasFP16; }