forked from OSchip/llvm-project
[X86] Clzero flag addition and inclusion under znver1
1. Adds the command line flag for clzero. 2. Includes the clzero flag under znver1. 3. Defines the macro for clzero. 4. Adds a new file which has the intrinsic definition for clzero instruction. Patch by Ganesh Gopalasubramanian with some additional tests from me. Differential revision: https://reviews.llvm.org/D29386 llvm-svn: 294559
This commit is contained in:
parent
50f3d1452c
commit
4574226c3f
|
@ -1814,6 +1814,9 @@ TARGET_BUILTIN(__builtin_ia32_selectpd_512, "V8dUcV8dV8d", "", "")
|
|||
TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "", "mwaitx")
|
||||
TARGET_BUILTIN(__builtin_ia32_mwaitx, "vUiUiUi", "", "mwaitx")
|
||||
|
||||
// CLZERO
|
||||
TARGET_BUILTIN(__builtin_ia32_clzero, "vv*", "", "clzero")
|
||||
|
||||
// MSVC
|
||||
TARGET_HEADER_BUILTIN(_BitScanForward, "UcULi*ULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
|
||||
TARGET_HEADER_BUILTIN(_BitScanReverse, "UcULi*ULi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
|
||||
|
|
|
@ -1714,6 +1714,7 @@ def mno_xsaveopt : Flag<["-"], "mno-xsaveopt">, Group<m_x86_Features_Group>;
|
|||
def mno_xsavec : Flag<["-"], "mno-xsavec">, Group<m_x86_Features_Group>;
|
||||
def mno_xsaves : Flag<["-"], "mno-xsaves">, Group<m_x86_Features_Group>;
|
||||
def mno_mwaitx : Flag<["-"], "mno-mwaitx">, Group<m_x86_Features_Group>;
|
||||
def mno_clzero : Flag<["-"], "mno-clzero">, Group<m_x86_Features_Group>;
|
||||
def mno_pku : Flag<["-"], "mno-pku">, Group<m_x86_Features_Group>;
|
||||
def mno_clflushopt : Flag<["-"], "mno-clflushopt">, Group<m_x86_Features_Group>;
|
||||
def mno_clwb : Flag<["-"], "mno-clwb">, Group<m_x86_Features_Group>;
|
||||
|
@ -1913,6 +1914,7 @@ def mxsaveopt : Flag<["-"], "mxsaveopt">, Group<m_x86_Features_Group>;
|
|||
def mxsavec : Flag<["-"], "mxsavec">, Group<m_x86_Features_Group>;
|
||||
def mxsaves : Flag<["-"], "mxsaves">, Group<m_x86_Features_Group>;
|
||||
def mmwaitx : Flag<["-"], "mmwaitx">, Group<m_x86_Features_Group>;
|
||||
def mclzero : Flag<["-"], "mclzero">, Group<m_x86_Features_Group>;
|
||||
def mclflushopt : Flag<["-"], "mclflushopt">, Group<m_x86_Features_Group>;
|
||||
def mclwb : Flag<["-"], "mclwb">, Group<m_x86_Features_Group>;
|
||||
def mmovbe : Flag<["-"], "mmovbe">, Group<m_x86_Features_Group>;
|
||||
|
|
|
@ -2489,6 +2489,7 @@ class X86TargetInfo : public TargetInfo {
|
|||
bool HasXSAVEC = false;
|
||||
bool HasXSAVES = false;
|
||||
bool HasMWAITX = false;
|
||||
bool HasCLZERO = false;
|
||||
bool HasPKU = false;
|
||||
bool HasCLFLUSHOPT = false;
|
||||
bool HasCLWB = false;
|
||||
|
@ -3201,6 +3202,7 @@ bool X86TargetInfo::initFeatureMap(
|
|||
setFeatureEnabledImpl(Features, "bmi", true);
|
||||
setFeatureEnabledImpl(Features, "bmi2", true);
|
||||
setFeatureEnabledImpl(Features, "clflushopt", true);
|
||||
setFeatureEnabledImpl(Features, "clzero", true);
|
||||
setFeatureEnabledImpl(Features, "cx16", true);
|
||||
setFeatureEnabledImpl(Features, "f16c", true);
|
||||
setFeatureEnabledImpl(Features, "fma", true);
|
||||
|
@ -3560,6 +3562,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
|
|||
HasCLWB = true;
|
||||
} else if (Feature == "+prefetchwt1") {
|
||||
HasPREFETCHWT1 = true;
|
||||
} else if (Feature == "+clzero") {
|
||||
HasCLZERO = true;
|
||||
}
|
||||
|
||||
X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
|
||||
|
@ -3887,6 +3891,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
|
|||
Builder.defineMacro("__SGX__");
|
||||
if (HasPREFETCHWT1)
|
||||
Builder.defineMacro("__PREFETCHWT1__");
|
||||
if (HasCLZERO)
|
||||
Builder.defineMacro("__CLZERO__");
|
||||
|
||||
// Each case falls through to the previous one here.
|
||||
switch (SSELevel) {
|
||||
|
@ -3973,6 +3979,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
|
|||
.Case("bmi2", HasBMI2)
|
||||
.Case("clflushopt", HasCLFLUSHOPT)
|
||||
.Case("clwb", HasCLWB)
|
||||
.Case("clzero", HasCLZERO)
|
||||
.Case("cx16", HasCX16)
|
||||
.Case("f16c", HasF16C)
|
||||
.Case("fma", HasFMA)
|
||||
|
|
|
@ -28,6 +28,7 @@ set(files
|
|||
__clang_cuda_intrinsics.h
|
||||
__clang_cuda_math_forward_declares.h
|
||||
__clang_cuda_runtime_wrapper.h
|
||||
clzerointrin.h
|
||||
cpuid.h
|
||||
clflushoptintrin.h
|
||||
emmintrin.h
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
/*===----------------------- clzerointrin.h - CLZERO ----------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __X86INTRIN_H
|
||||
#error "Never use <clzerointrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _CLZEROINTRIN_H
|
||||
#define _CLZEROINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("clzero")))
|
||||
|
||||
/// \brief Loads the cache line address and zero's out the cacheline
|
||||
///
|
||||
/// \headerfile <clzerointrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> CLZERO </c> instruction.
|
||||
///
|
||||
/// \param __line
|
||||
/// A pointer to a cacheline which needs to be zeroed out.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_clzero (void * __line)
|
||||
{
|
||||
__builtin_ia32_clzero ((void *)__line);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* _CLZEROINTRIN_H */
|
|
@ -61,6 +61,7 @@ module _Builtin_intrinsics [system] [extern_c] {
|
|||
textual header "xopintrin.h"
|
||||
textual header "fma4intrin.h"
|
||||
textual header "mwaitxintrin.h"
|
||||
textual header "clzerointrin.h"
|
||||
|
||||
explicit module mm_malloc {
|
||||
requires !freestanding
|
||||
|
|
|
@ -80,6 +80,10 @@
|
|||
#include <mwaitxintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__)
|
||||
#include <clzerointrin.h>
|
||||
#endif
|
||||
|
||||
/* FIXME: LWP */
|
||||
|
||||
#endif /* __X86INTRIN_H */
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include <immintrin.h>
|
||||
void test_mm_clflushopt(char * __m) {
|
||||
//CHECK-LABLE: @test_mm_clflushopt
|
||||
//CHECK-LABEL: @test_mm_clflushopt
|
||||
//CHECK: @llvm.x86.clflushopt
|
||||
_mm_clflushopt(__m);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +clzero -emit-llvm -o - -Wall -Werror | FileCheck %s
|
||||
#define __MM_MALLOC_H
|
||||
|
||||
#include <x86intrin.h>
|
||||
void test_mm_clzero(void * __m) {
|
||||
//CHECK-LABEL: @test_mm_clzero
|
||||
//CHECK: @llvm.x86.clflushopt
|
||||
_mm_clzero(__m);
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
// RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -emit-llvm -o %t %s
|
||||
// RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -fsyntax-only -o %t %s
|
||||
// RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -emit-llvm -o %t %s
|
||||
// RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -fsyntax-only -o %t %s
|
||||
|
||||
#ifdef USE_ALL
|
||||
#define USE_3DNOW
|
||||
|
@ -285,6 +285,7 @@ void f0() {
|
|||
|
||||
(void) __builtin_ia32_monitorx(tmp_vp, tmp_Ui, tmp_Ui);
|
||||
(void) __builtin_ia32_mwaitx(tmp_Ui, tmp_Ui, tmp_Ui);
|
||||
(void) __builtin_ia32_clzero(tmp_vp);
|
||||
|
||||
tmp_V4f = __builtin_ia32_cvtpi2ps(tmp_V4f, tmp_V2i);
|
||||
tmp_V2i = __builtin_ia32_cvtps2pi(tmp_V4f);
|
||||
|
|
|
@ -79,3 +79,8 @@
|
|||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-prefetchwt1 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-PREFETCHWT1 %s
|
||||
// PREFETCHWT1: "-target-feature" "+prefetchwt1"
|
||||
// NO-PREFETCHWT1: "-target-feature" "-prefetchwt1"
|
||||
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mclzero %s -### -o %t.o 2>&1 | FileCheck -check-prefix=CLZERO %s
|
||||
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-clzero %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CLZERO %s
|
||||
// CLZERO: "-target-feature" "+clzero"
|
||||
// NO-CLZERO: "-target-feature" "-clzero"
|
||||
|
|
|
@ -1883,6 +1883,7 @@
|
|||
// CHECK_ZNVER1_M32: #define __BMI2__ 1
|
||||
// CHECK_ZNVER1_M32: #define __BMI__ 1
|
||||
// CHECK_ZNVER1_M32: #define __CLFLUSHOPT__ 1
|
||||
// CHECK_ZNVER1_M32: #define __CLZERO__ 1
|
||||
// CHECK_ZNVER1_M32: #define __F16C__ 1
|
||||
// CHECK_ZNVER1_M32: #define __FMA__ 1
|
||||
// CHECK_ZNVER1_M32: #define __FSGSBASE__ 1
|
||||
|
@ -1924,6 +1925,7 @@
|
|||
// CHECK_ZNVER1_M64: #define __BMI2__ 1
|
||||
// CHECK_ZNVER1_M64: #define __BMI__ 1
|
||||
// CHECK_ZNVER1_M64: #define __CLFLUSHOPT__ 1
|
||||
// CHECK_ZNVER1_M64: #define __CLZERO__ 1
|
||||
// CHECK_ZNVER1_M64: #define __F16C__ 1
|
||||
// CHECK_ZNVER1_M64: #define __FMA__ 1
|
||||
// CHECK_ZNVER1_M64: #define __FSGSBASE__ 1
|
||||
|
|
Loading…
Reference in New Issue