From a51e0c2243d72ed0c412dc53b6c731a3ff21241f Mon Sep 17 00:00:00 2001 From: Gabor Buella Date: Tue, 1 May 2018 10:05:42 +0000 Subject: [PATCH] [X86] directstore and movdir64b intrinsics Reviewers: spatel, craig.topper, RKSimon Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D45984 llvm-svn: 331249 --- clang/docs/ClangCommandLineReference.rst | 4 ++ clang/include/clang/Basic/BuiltinsX86.def | 4 ++ clang/include/clang/Basic/BuiltinsX86_64.def | 1 + clang/include/clang/Driver/Options.td | 4 ++ clang/lib/Basic/Targets/X86.cpp | 14 +++++ clang/lib/Basic/Targets/X86.h | 2 + clang/lib/Headers/CMakeLists.txt | 1 + clang/lib/Headers/cpuid.h | 2 + clang/lib/Headers/module.modulemap | 1 + clang/lib/Headers/movdirintrin.h | 58 +++++++++++++++++++ clang/lib/Headers/x86intrin.h | 5 ++ clang/test/CodeGen/builtin-movdir.c | 31 ++++++++++ clang/test/Driver/x86-target-features.c | 10 ++++ .../Preprocessor/predefined-arch-macros.c | 4 ++ 14 files changed, 141 insertions(+) create mode 100644 clang/lib/Headers/movdirintrin.h create mode 100644 clang/test/CodeGen/builtin-movdir.c diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst index cafd6f96b234..62a13578aa23 100644 --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -2496,6 +2496,10 @@ X86 .. option:: -mmovbe, -mno-movbe +.. option:: -mmovdiri, -mno-movdiri + +.. option:: -mmovdir64b, -mno-movdir64b + .. option:: -mmpx, -mno-mpx .. option:: -mmwaitx, -mno-mwaitx diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 4bf3f77ab6d2..526adb16a123 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1883,6 +1883,10 @@ TARGET_BUILTIN(__builtin_ia32_clzero, "vv*", "", "clzero") // CLDEMOTE TARGET_BUILTIN(__builtin_ia32_cldemote, "vvC*", "", "cldemote") +// Direct Move +TARGET_BUILTIN(__builtin_ia32_directstore_u32, "vUi*Ui", "", "movdiri") +TARGET_BUILTIN(__builtin_ia32_movdir64b, "vv*vC*", "", "movdir64b") + // MSVC TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def index 6b4af7eb0f53..b1333c4fd90d 100644 --- a/clang/include/clang/Basic/BuiltinsX86_64.def +++ b/clang/include/clang/Basic/BuiltinsX86_64.def @@ -94,6 +94,7 @@ TARGET_BUILTIN(__builtin_ia32_cvtsi2sd64, "V2dV2dLLiIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_cvtsi2ss64, "V4fV4fLLiIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dULLiIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fULLiIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "", "movdiri") #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 944a51ce1611..4a9e22b8460d 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2660,6 +2660,10 @@ def mlzcnt : Flag<["-"], "mlzcnt">, Group; def mno_lzcnt : Flag<["-"], "mno-lzcnt">, Group; def mmovbe : Flag<["-"], "mmovbe">, Group; def mno_movbe : Flag<["-"], "mno-movbe">, Group; +def mmovdiri : Flag<["-"], "mmovdiri">, Group; +def mno_movdiri : Flag<["-"], "mno-movdiri">, Group; +def mmovdir64b : Flag<["-"], "mmovdir64b">, Group; +def mno_movdir64b : Flag<["-"], "mno-movdir64b">, Group; def mmpx : Flag<["-"], "mmpx">, Group; def mno_mpx : Flag<["-"], "mno-mpx">, Group; def mmwaitx : Flag<["-"], "mmwaitx">, Group; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index f2d643b515ae..bb098cb50659 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -246,6 +246,8 @@ bool X86TargetInfo::initFeatureMap( case CK_Tremont: setFeatureEnabledImpl(Features, "cldemote", true); + setFeatureEnabledImpl(Features, "movdiri", true); + setFeatureEnabledImpl(Features, "movdir64b", true); setFeatureEnabledImpl(Features, "gfni", true); setFeatureEnabledImpl(Features, "waitpkg", true); LLVM_FALLTHROUGH; @@ -821,6 +823,10 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasLAHFSAHF = true; } else if (Feature == "+waitpkg") { HasWAITPKG = true; + } else if (Feature == "+movdiri") { + HasMOVDIRI = true; + } else if (Feature == "+movdir64b") { + HasMOVDIR64B = true; } X86SSEEnum Level = llvm::StringSwitch(Feature) @@ -1177,6 +1183,10 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__CLDEMOTE__"); if (HasWAITPKG) Builder.defineMacro("__WAITPKG__"); + if (HasMOVDIRI) + Builder.defineMacro("__MOVDIRI__"); + if (HasMOVDIR64B) + Builder.defineMacro("__MOVDIR64B__"); // Each case falls through to the previous one here. switch (SSELevel) { @@ -1301,6 +1311,8 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const { .Case("lzcnt", true) .Case("mmx", true) .Case("movbe", true) + .Case("movdiri", true) + .Case("movdir64b", true) .Case("mpx", true) .Case("mwaitx", true) .Case("pclmul", true) @@ -1377,6 +1389,8 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("mm3dnowa", MMX3DNowLevel >= AMD3DNowAthlon) .Case("mmx", MMX3DNowLevel >= MMX) .Case("movbe", HasMOVBE) + .Case("movdiri", HasMOVDIRI) + .Case("movdir64b", HasMOVDIR64B) .Case("mpx", HasMPX) .Case("mwaitx", HasMWAITX) .Case("pclmul", HasPCLMUL) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 7f395218a02d..51509f727c52 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -103,6 +103,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasLAHFSAHF = false; bool HasWBNOINVD = false; bool HasWAITPKG = false; + bool HasMOVDIRI = false; + bool HasMOVDIR64B = false; protected: /// \brief Enumeration of all of the X86 CPUs supported by Clang. diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 3b240713210f..f8e1e606e3ea 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -66,6 +66,7 @@ set(files mmintrin.h mm_malloc.h module.modulemap + movdirintrin.h msa.h mwaitxintrin.h nmmintrin.h diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h index c35228c8585d..d3f82408a051 100644 --- a/clang/lib/Headers/cpuid.h +++ b/clang/lib/Headers/cpuid.h @@ -188,6 +188,8 @@ #define bit_AVX512VPOPCNTDQ 0x00004000 #define bit_RDPID 0x00400000 #define bit_CLDEMOTE 0x02000000 +#define bit_MOVDIRI 0x08000000 +#define bit_MOVDIR64B 0x10000000 /* Features in %edx for leaf 7 sub-leaf 0 */ #define bit_AVX5124VNNIW 0x00000004 diff --git a/clang/lib/Headers/module.modulemap b/clang/lib/Headers/module.modulemap index ce2cd90ae7fb..50b64a9e3539 100644 --- a/clang/lib/Headers/module.modulemap +++ b/clang/lib/Headers/module.modulemap @@ -66,6 +66,7 @@ module _Builtin_intrinsics [system] [extern_c] { textual header "wbnoinvdintrin.h" textual header "cldemoteintrin.h" textual header "waitpkgintrin.h" + textual header "movdirintrin.h" explicit module mm_malloc { requires !freestanding diff --git a/clang/lib/Headers/movdirintrin.h b/clang/lib/Headers/movdirintrin.h new file mode 100644 index 000000000000..746433799aa7 --- /dev/null +++ b/clang/lib/Headers/movdirintrin.h @@ -0,0 +1,58 @@ +/*===------------------------- movdirintrin.h ------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef _MOVDIRINTRIN_H +#define _MOVDIRINTRIN_H + +// Move doubleword as direct store +static __inline__ void +__attribute__((__always_inline__, __nodebug__, __target__("movdiri"))) +_directstoreu_u32 (void *__dst, unsigned int __value) +{ + __builtin_ia32_directstore_u32((unsigned int *)__dst, (unsigned int)__value); +} + +#ifdef __x86_64__ + +// Move quadword as direct store +static __inline__ void +__attribute__((__always_inline__, __nodebug__, __target__("movdiri"))) +_directstoreu_u64 (void *__dst, unsigned long __value) +{ + __builtin_ia32_directstore_u64((unsigned long *)__dst, __value); +} + +#endif /* __x86_64__ */ + +// Move 64 bytes as direct store +static __inline__ void +__attribute__((__always_inline__, __nodebug__, __target__("movdir64b"))) +_movdir64b (void *__dst, const void *__src) +{ + __builtin_ia32_movdir64b(__dst, __src); +} + +#endif /* _MOVDIRINTRIN_H */ diff --git a/clang/lib/Headers/x86intrin.h b/clang/lib/Headers/x86intrin.h index b1dadc5818e1..42023a9425eb 100644 --- a/clang/lib/Headers/x86intrin.h +++ b/clang/lib/Headers/x86intrin.h @@ -100,4 +100,9 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || \ + defined(__MOVDIRI__) || defined(__MOVDIR64B__) +#include +#endif + #endif /* __X86INTRIN_H */ diff --git a/clang/test/CodeGen/builtin-movdir.c b/clang/test/CodeGen/builtin-movdir.c new file mode 100644 index 000000000000..39f3fad997b6 --- /dev/null +++ b/clang/test/CodeGen/builtin-movdir.c @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -ffreestanding -Wall -pedantic -triple x86_64-unknown-unknown -target-feature +movdiri -target-feature +movdir64b %s -emit-llvm -o - | FileCheck %s --check-prefix=X86_64 --check-prefix=CHECK +// RUN: %clang_cc1 -ffreestanding -Wall -pedantic -triple i386-unknown-unknown -target-feature +movdiri -target-feature +movdir64b %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK + +#include +#include + +void test_directstore32(void *dst, uint32_t value) { + // CHECK-LABEL: test_directstore32 + // CHECK: call void @llvm.x86.directstore32 + _directstoreu_u32(dst, value); +} + +#ifdef __x86_64__ + +void test_directstore64(void *dst, uint64_t value) { + // X86_64-LABEL: test_directstore64 + // X86_64: call void @llvm.x86.directstore64 + _directstoreu_u64(dst, value); +} + +#endif + +void test_dir64b(void *dst, const void *src) { + // CHECK-LABEL: test_dir64b + // CHECK: call void @llvm.x86.movdir64b + _movdir64b(dst, src); +} + +// CHECK: declare void @llvm.x86.directstore32(i8*, i32) +// X86_64: declare void @llvm.x86.directstore64(i8*, i64) +// CHECK: declare void @llvm.x86.movdir64b(i8*, i8*) diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c index b066babb72f0..8073acd95306 100644 --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -149,3 +149,13 @@ // RUN: %clang -target i386-linux-gnu -mno-waitpkg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-WAITPKG %s // WAITPKG: "-target-feature" "+waitpkg" // NO-WAITPKG: "-target-feature" "-waitpkg" + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mmovdiri %s -### -o %t.o 2>&1 | FileCheck -check-prefix=MOVDIRI %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-movdiri %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-MOVDIRI %s +// MOVDIRI: "-target-feature" "+movdiri" +// NO-MOVDIRI: "-target-feature" "-movdiri" + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mmovdir64b %s -### -o %t.o 2>&1 | FileCheck -check-prefix=MOVDIR64B %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-movdir64b %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-MOVDIR64B %s +// MOVDIR64B: "-target-feature" "+movdir64b" +// NO-MOVDIR64B: "-target-feature" "-movdir64b" diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index fad91793da37..a22f6f649779 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -1466,6 +1466,8 @@ // CHECK_TRM_M32: #define __FXSR__ 1 // CHECK_TRM_M32: #define __GFNI__ 1 // CHECK_TRM_M32: #define __MMX__ 1 +// CHECK_TRM_M32: #define __MOVDIR64B__ 1 +// CHECK_TRM_M32: #define __MOVDIRI__ 1 // CHECK_TRM_M32: #define __MPX__ 1 // CHECK_TRM_M32: #define __PCLMUL__ 1 // CHECK_TRM_M32: #define __POPCNT__ 1 @@ -1504,6 +1506,8 @@ // CHECK_TRM_M64: #define __FXSR__ 1 // CHECK_TRM_M64: #define __GFNI__ 1 // CHECK_TRM_M64: #define __MMX__ 1 +// CHECK_TRM_M64: #define __MOVDIR64B__ 1 +// CHECK_TRM_M64: #define __MOVDIRI__ 1 // CHECK_TRM_M64: #define __MPX__ 1 // CHECK_TRM_M64: #define __PCLMUL__ 1 // CHECK_TRM_M64: #define __POPCNT__ 1