diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 8962e7a41c8a..b77033a0b385 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -682,14 +682,10 @@ TARGET_BUILTIN(__builtin_ia32_sha256msg2, "V4iV4iV4i", "", "sha") // FMA TARGET_BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "", "fma|fma4") TARGET_BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmsubss3, "V4fV4fV4fV4f", "", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmsubsd3, "V2dV2dV2dV2d", "", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfnmaddss3, "V4fV4fV4fV4f", "", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfnmaddsd3, "V2dV2dV2dV2d", "", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfnmsubss3, "V4fV4fV4fV4f", "", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfnmsubsd3, "V2dV2dV2dV2d", "", "fma|fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "", "fma") +TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "", "fma") +TARGET_BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "", "fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "", "fma4") TARGET_BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "", "fma|fma4") TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "", "fma|fma4") TARGET_BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "", "fma|fma4") diff --git a/clang/lib/Headers/fma4intrin.h b/clang/lib/Headers/fma4intrin.h index cda20e7d8109..962b1a60a258 100644 --- a/clang/lib/Headers/fma4intrin.h +++ b/clang/lib/Headers/fma4intrin.h @@ -48,13 +48,13 @@ _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS @@ -72,13 +72,13 @@ _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS @@ -96,13 +96,13 @@ _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfnmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfnmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS @@ -120,13 +120,13 @@ _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfnmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfnmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/fmaintrin.h b/clang/lib/Headers/fmaintrin.h index 86a1198879b9..478a0ac81cf2 100644 --- a/clang/lib/Headers/fmaintrin.h +++ b/clang/lib/Headers/fmaintrin.h @@ -70,13 +70,13 @@ _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS @@ -94,13 +94,13 @@ _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfnmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfnmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS @@ -118,13 +118,13 @@ _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfnmsubss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); + return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfnmsubsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); + return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C); } static __inline__ __m128 __DEFAULT_FN_ATTRS diff --git a/clang/test/CodeGen/fma-builtins.c b/clang/test/CodeGen/fma-builtins.c index d5905d33a979..6f792a78d84c 100644 --- a/clang/test/CodeGen/fma-builtins.c +++ b/clang/test/CodeGen/fma-builtins.c @@ -43,13 +43,15 @@ __m128d test_mm_fmsub_pd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_fmsub_ss(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fmsub_ss - // CHECK: @llvm.x86.fma.vfmsub.ss + // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.+}} + // CHECK: @llvm.x86.fma.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]]) return _mm_fmsub_ss(a, b, c); } __m128d test_mm_fmsub_sd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_fmsub_sd - // CHECK: @llvm.x86.fma.vfmsub.sd + // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.+}} + // CHECK: @llvm.x86.fma.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]]) return _mm_fmsub_sd(a, b, c); } @@ -69,13 +71,15 @@ __m128d test_mm_fnmadd_pd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_fnmadd_ss(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fnmadd_ss - // CHECK: @llvm.x86.fma.vfnmadd.ss + // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.+}} + // CHECK: @llvm.x86.fma.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> [[NEG]], <4 x float> %{{.+}}) return _mm_fnmadd_ss(a, b, c); } __m128d test_mm_fnmadd_sd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_fnmadd_sd - // CHECK: @llvm.x86.fma.vfnmadd.sd + // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.+}} + // CHECK: @llvm.x86.fma.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> [[NEG]], <2 x double> %{{.+}}) return _mm_fnmadd_sd(a, b, c); } @@ -97,13 +101,17 @@ __m128d test_mm_fnmsub_pd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_fnmsub_ss(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_fnmsub_ss - // CHECK: @llvm.x86.fma.vfnmsub.ss + // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.+}} + // CHECK: [[NEG2:%.+]] = fsub <4 x float> , %{{.+}} + // CHECK: @llvm.x86.fma.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> [[NEG]], <4 x float> [[NEG2]]) return _mm_fnmsub_ss(a, b, c); } __m128d test_mm_fnmsub_sd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_fnmsub_sd - // CHECK: @llvm.x86.fma.vfnmsub.sd + // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.+}} + // CHECK: [[NEG2:%.+]] = fsub <2 x double> , %{{.+}} + // CHECK: @llvm.x86.fma.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> [[NEG]], <2 x double> [[NEG2]]) return _mm_fnmsub_sd(a, b, c); } diff --git a/clang/test/CodeGen/fma4-builtins.c b/clang/test/CodeGen/fma4-builtins.c index 68a2ec6e3a99..c848d4a751da 100644 --- a/clang/test/CodeGen/fma4-builtins.c +++ b/clang/test/CodeGen/fma4-builtins.c @@ -17,13 +17,13 @@ __m128d test_mm_macc_pd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_macc_ss(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_macc_ss - // CHECK: @llvm.x86.fma.vfmadd.ss + // CHECK: @llvm.x86.fma4.vfmadd.ss return _mm_macc_ss(a, b, c); } __m128d test_mm_macc_sd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_macc_sd - // CHECK: @llvm.x86.fma.vfmadd.sd + // CHECK: @llvm.x86.fma4.vfmadd.sd return _mm_macc_sd(a, b, c); } @@ -43,13 +43,15 @@ __m128d test_mm_msub_pd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_msub_ss(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_msub_ss - // CHECK: @llvm.x86.fma.vfmsub.ss + // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.+}} + // CHECK: @llvm.x86.fma4.vfmadd.ss(<4 x float> %{{.+}}, <4 x float> %{{.+}}, <4 x float> [[NEG]]) return _mm_msub_ss(a, b, c); } __m128d test_mm_msub_sd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_msub_sd - // CHECK: @llvm.x86.fma.vfmsub.sd + // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.+}} + // CHECK: @llvm.x86.fma4.vfmadd.sd(<2 x double> %{{.+}}, <2 x double> %{{.+}}, <2 x double> [[NEG]]) return _mm_msub_sd(a, b, c); } @@ -69,13 +71,15 @@ __m128d test_mm_nmacc_pd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_nmacc_ss(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_nmacc_ss - // CHECK: @llvm.x86.fma.vfnmadd.ss + // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.+}} + // CHECK: @llvm.x86.fma4.vfmadd.ss(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> %{{.+}}) return _mm_nmacc_ss(a, b, c); } __m128d test_mm_nmacc_sd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_nmacc_sd - // CHECK: @llvm.x86.fma.vfnmadd.sd + // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.+}} + // CHECK: @llvm.x86.fma4.vfmadd.sd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> %{{.+}}) return _mm_nmacc_sd(a, b, c); } @@ -97,13 +101,17 @@ __m128d test_mm_nmsub_pd(__m128d a, __m128d b, __m128d c) { __m128 test_mm_nmsub_ss(__m128 a, __m128 b, __m128 c) { // CHECK-LABEL: test_mm_nmsub_ss - // CHECK: @llvm.x86.fma.vfnmsub.ss + // CHECK: [[NEG:%.+]] = fsub <4 x float> , %{{.+}} + // CHECK: [[NEG2:%.+]] = fsub <4 x float> , %{{.+}} + // CHECK: @llvm.x86.fma4.vfmadd.ss(<4 x float> [[NEG]], <4 x float> %{{.+}}, <4 x float> [[NEG2]]) return _mm_nmsub_ss(a, b, c); } __m128d test_mm_nmsub_sd(__m128d a, __m128d b, __m128d c) { // CHECK-LABEL: test_mm_nmsub_sd - // CHECK: @llvm.x86.fma.vfnmsub.sd + // CHECK: [[NEG:%.+]] = fsub <2 x double> , %{{.+}} + // CHECK: [[NEG2:%.+]] = fsub <2 x double> , %{{.+}} + // CHECK: @llvm.x86.fma4.vfmadd.sd(<2 x double> [[NEG]], <2 x double> %{{.+}}, <2 x double> [[NEG2]]) return _mm_nmsub_sd(a, b, c); }