diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 50d54b436e07..931c81c5b632 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -49,8 +49,14 @@ def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", // FIXME: Currently, this is only flagged for Cortex-A8. It may be true for // others as well. We should do more benchmarking and confirm one way or // the other. -def HasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true", - "Disable VFP MAC instructions">; +def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true", + "Disable VFP MAC instructions">; +// Some processors benefit from using NEON instructions for scalar +// single-precision FP operations. +def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", + "true", + "Use NEON for single precision FP">; + //===----------------------------------------------------------------------===// // ARM Processors supported. @@ -115,7 +121,8 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, - [ArchV7A, FeatureThumb2, FeatureNEON, HasSlowVMLx]>; + [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx, + FeatureNEONForFP]>; def : ProcNoItin<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 76ed66fe7df4..9e55cd870031 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -22,10 +22,6 @@ using namespace llvm; static cl::opt ReserveR9("arm-reserve-r9", cl::Hidden, cl::desc("Reserve R9, making it unavailable as GPR")); -static cl::opt -UseNEONFP("arm-use-neon-fp", - cl::desc("Use NEON for single-precision FP"), - cl::init(false), cl::Hidden); static cl::opt UseMOVT("arm-use-movt", @@ -35,7 +31,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, bool isT) : ARMArchVersion(V4) , ARMFPUType(None) - , UseNEONForSinglePrecisionFP(UseNEONFP) + , UseNEONForSinglePrecisionFP(false) , SlowVMLx(false) , IsThumb(isT) , ThumbMode(Thumb1) @@ -116,14 +112,6 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, if (!isThumb() || hasThumb2()) PostRAScheduler = true; - - // Set CPU specific features. - if (CPUString == "cortex-a8") { - // On Cortex-a8, it's faster to perform some single-precision FP - // operations with NEON instructions. - if (UseNEONFP.getPosition() == 0) - UseNEONForSinglePrecisionFP = true; - } } /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. diff --git a/llvm/test/CodeGen/ARM/fabss.ll b/llvm/test/CodeGen/ARM/fabss.ll index e5b5791b3cda..f03282bdab7f 100644 --- a/llvm/test/CodeGen/ARM/fabss.ll +++ b/llvm/test/CodeGen/ARM/fabss.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 diff --git a/llvm/test/CodeGen/ARM/fadds.ll b/llvm/test/CodeGen/ARM/fadds.ll index db18a86eccd8..749690e98d0f 100644 --- a/llvm/test/CodeGen/ARM/fadds.ll +++ b/llvm/test/CodeGen/ARM/fadds.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 diff --git a/llvm/test/CodeGen/ARM/fdivs.ll b/llvm/test/CodeGen/ARM/fdivs.ll index a5c86bf26343..0c3149579297 100644 --- a/llvm/test/CodeGen/ARM/fdivs.ll +++ b/llvm/test/CodeGen/ARM/fdivs.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 diff --git a/llvm/test/CodeGen/ARM/fmacs.ll b/llvm/test/CodeGen/ARM/fmacs.ll index 904a58739370..f8b47b5bac0d 100644 --- a/llvm/test/CodeGen/ARM/fmacs.ll +++ b/llvm/test/CodeGen/ARM/fmacs.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 diff --git a/llvm/test/CodeGen/ARM/fmscs.ll b/llvm/test/CodeGen/ARM/fmscs.ll index 7b9e029b676e..7a70543dee6c 100644 --- a/llvm/test/CodeGen/ARM/fmscs.ll +++ b/llvm/test/CodeGen/ARM/fmscs.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 diff --git a/llvm/test/CodeGen/ARM/fmuls.ll b/llvm/test/CodeGen/ARM/fmuls.ll index d3c9c82e9745..ef4e3e52818e 100644 --- a/llvm/test/CodeGen/ARM/fmuls.ll +++ b/llvm/test/CodeGen/ARM/fmuls.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 diff --git a/llvm/test/CodeGen/ARM/fnegs.ll b/llvm/test/CodeGen/ARM/fnegs.ll index d6c22f14a4ca..c15005e6e8ab 100644 --- a/llvm/test/CodeGen/ARM/fnegs.ll +++ b/llvm/test/CodeGen/ARM/fnegs.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 diff --git a/llvm/test/CodeGen/ARM/fnmacs.ll b/llvm/test/CodeGen/ARM/fnmacs.ll index 724947ea04d3..1d1d06a70ea6 100644 --- a/llvm/test/CodeGen/ARM/fnmacs.ll +++ b/llvm/test/CodeGen/ARM/fnmacs.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NEON -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NEONFP +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEONFP define float @test(float %acc, float %a, float %b) { entry: diff --git a/llvm/test/CodeGen/ARM/fnmscs.ll b/llvm/test/CodeGen/ARM/fnmscs.ll index ad2188218e4d..6b7cefa6414d 100644 --- a/llvm/test/CodeGen/ARM/fnmscs.ll +++ b/llvm/test/CodeGen/ARM/fnmscs.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s diff --git a/llvm/test/CodeGen/ARM/fp_convert.ll b/llvm/test/CodeGen/ARM/fp_convert.ll index 2adac78cf806..1ef9f7f32164 100644 --- a/llvm/test/CodeGen/ARM/fp_convert.ll +++ b/llvm/test/CodeGen/ARM/fp_convert.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NEON -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=VFP2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2 diff --git a/llvm/test/CodeGen/ARM/fsubs.ll b/llvm/test/CodeGen/ARM/fsubs.ll index ae98be307892..bea8d5f4f30b 100644 --- a/llvm/test/CodeGen/ARM/fsubs.ll +++ b/llvm/test/CodeGen/ARM/fsubs.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 define float @test(float %a, float %b) { entry: diff --git a/llvm/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll b/llvm/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll index 319d29b790e8..9c1fdb32e836 100644 --- a/llvm/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll +++ b/llvm/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+neon -arm-use-neon-fp -relocation-model=pic -disable-fp-elim +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim type { %struct.GAP } ; type %0 type { i16, i8, i8 } ; type %1 diff --git a/llvm/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll b/llvm/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll index a62b61290a5a..317db64ae45c 100644 --- a/llvm/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll +++ b/llvm/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+neon -arm-use-neon-fp -relocation-model=pic -disable-fp-elim -O3 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -O3 type { i16, i8, i8 } ; type %0 type { [2 x i32], [2 x i32] } ; type %1 diff --git a/llvm/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll b/llvm/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll index 76474746ee48..2bbc231f9622 100644 --- a/llvm/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll +++ b/llvm/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp -; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp | not grep fcpys +; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | not grep fcpys ; rdar://7117307 %struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List } diff --git a/llvm/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll b/llvm/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll index acf562c74a2a..82944847663a 100644 --- a/llvm/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll +++ b/llvm/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp +; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 ; rdar://7117307 %struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List } diff --git a/llvm/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll b/llvm/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll index 3ada02676bfc..b18c972aeddc 100644 --- a/llvm/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll +++ b/llvm/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+neon -arm-use-neon-fp +; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 ; rdar://7117307 %struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List } diff --git a/llvm/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll b/llvm/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll index 090ed2d81f60..96bcbad77146 100644 --- a/llvm/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll +++ b/llvm/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 -arm-use-neon-fp +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } %struct.JHUFF_TBL = type { [17 x i8], [256 x i8], i32 }