ARM: allow both vfma and vfms intrinsics on v7.

The main purpose here is that vfma/vfms should be symmetric, and they are supported on most v7 cores. The new ArchGuard is suggested by ACLE but prophylactic for us. Almost all CPUs with NEON *will* have vfma, and the few exceptions I know of (e.g. Cortex-A8) are incorrectly modelled by Clang so can't trigger a test. Fortunately, they're getting rarer. But if we ever do support them properly arm_neon.h should now do the right thing. llvm-svn: 259537
2016-02-02 18:02:10 +00:00 · 2016-02-02 18:02:10 +00:00 · bbac6d7c1b
parent 1fcd610c94
commit bbac6d7c1b
2 changed files with 17 additions and 2 deletions
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@ -824,7 +824,10 @@ def VREINTERPRET
 ////////////////////////////////////////////////////////////////////////////////
 // Vector fused multiply-add operations

-def VFMA : SInst<"vfma", "dddd", "fQf">;
+let ArchGuard = "defined(__ARM_FEATURE_FMA)" in {
+  def VFMA : SInst<"vfma", "dddd", "fQf">;
+  def VFMS : SInst<"vfms", "dddd", "fQf">;
+}

 ////////////////////////////////////////////////////////////////////////////////
 // fp16 vector operations
@ -908,7 +911,7 @@ def FDIV : IOpInst<"vdiv", "ddd",  "fdQfQd", OP_DIV>;
 ////////////////////////////////////////////////////////////////////////////////
 // Vector fused multiply-add operations
 def FMLA : SInst<"vfma", "dddd", "dQd">;
-def FMLS : SInst<"vfms", "dddd", "fdQfQd">;
+def FMLS : SInst<"vfms", "dddd", "dQd">;

 ////////////////////////////////////////////////////////////////////////////////
 // MUL, MLA, MLS, FMA, FMS definitions with scalar argument
--- a/clang/test/Sema/arm_vfma.c
+++ b/clang/test/Sema/arm_vfma.c
@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple thumbv7s-apple-ios7.0 -target-feature +neon -fsyntax-only -verify %s
+#include <arm_neon.h>
+
+// expected-no-diagnostics
+
+void func(float32x2_t v2f32, float32x4_t v4f32) {
+  vfma_f32(v2f32, v2f32, v2f32);
+  vfmaq_f32(v4f32, v4f32, v4f32);
+
+  vfms_f32(v2f32, v2f32, v2f32);
+  vfmsq_f32(v4f32, v4f32, v4f32);
+}