forked from OSchip/llvm-project
[X86][LWP] Add clang support for LWP instructions.
This patch adds support for the the LightWeight Profiling (LWP) instructions which are available on all AMD Bulldozer class CPUs (bdver1 to bdver4). Differential Revision: https://reviews.llvm.org/D32770 llvm-svn: 302418
This commit is contained in:
parent
f5ca255d18
commit
3511348dbb
|
@ -668,6 +668,12 @@ TARGET_BUILTIN(__builtin_ia32_pext_si, "UiUiUi", "", "bmi2")
|
|||
// TBM
|
||||
TARGET_BUILTIN(__builtin_ia32_bextri_u32, "UiUiIUi", "", "tbm")
|
||||
|
||||
// LWP
|
||||
TARGET_BUILTIN(__builtin_ia32_llwpcb, "vv*", "", "lwp")
|
||||
TARGET_BUILTIN(__builtin_ia32_slwpcb, "v*", "", "lwp")
|
||||
TARGET_BUILTIN(__builtin_ia32_lwpins32, "UcUiUiUi", "", "lwp")
|
||||
TARGET_BUILTIN(__builtin_ia32_lwpval32, "vUiUiUi", "", "lwp")
|
||||
|
||||
// SHA
|
||||
TARGET_BUILTIN(__builtin_ia32_sha1rnds4, "V4iV4iV4iIc", "", "sha")
|
||||
TARGET_BUILTIN(__builtin_ia32_sha1nexte, "V4iV4iV4i", "", "sha")
|
||||
|
|
|
@ -69,6 +69,8 @@ TARGET_BUILTIN(__builtin_ia32_bzhi_di, "ULLiULLiULLi", "", "bmi2")
|
|||
TARGET_BUILTIN(__builtin_ia32_pdep_di, "ULLiULLiULLi", "", "bmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_pext_di, "ULLiULLiULLi", "", "bmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "", "tbm")
|
||||
TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcULLiUiUi", "", "lwp")
|
||||
TARGET_BUILTIN(__builtin_ia32_lwpval64, "vULLiUiUi", "", "lwp")
|
||||
TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_gpr_mask, "V8LLiLLiV8LLiUc", "", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_pbroadcastq128_gpr_mask, "V2LLiULLiV2LLiUc","","avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_pbroadcastq256_gpr_mask, "V4LLiULLiV4LLiUc","","avx512vl")
|
||||
|
|
|
@ -1752,6 +1752,7 @@ def mno_bmi : Flag<["-"], "mno-bmi">, Group<m_x86_Features_Group>;
|
|||
def mno_bmi2 : Flag<["-"], "mno-bmi2">, Group<m_x86_Features_Group>;
|
||||
def mno_popcnt : Flag<["-"], "mno-popcnt">, Group<m_x86_Features_Group>;
|
||||
def mno_tbm : Flag<["-"], "mno-tbm">, Group<m_x86_Features_Group>;
|
||||
def mno_lwp : Flag<["-"], "mno-lwp">, Group<m_x86_Features_Group>;
|
||||
def mno_fma4 : Flag<["-"], "mno-fma4">, Group<m_x86_Features_Group>;
|
||||
def mno_fma : Flag<["-"], "mno-fma">, Group<m_x86_Features_Group>;
|
||||
def mno_xop : Flag<["-"], "mno-xop">, Group<m_x86_Features_Group>;
|
||||
|
@ -1951,6 +1952,7 @@ def mbmi : Flag<["-"], "mbmi">, Group<m_x86_Features_Group>;
|
|||
def mbmi2 : Flag<["-"], "mbmi2">, Group<m_x86_Features_Group>;
|
||||
def mpopcnt : Flag<["-"], "mpopcnt">, Group<m_x86_Features_Group>;
|
||||
def mtbm : Flag<["-"], "mtbm">, Group<m_x86_Features_Group>;
|
||||
def mlwp : Flag<["-"], "mlwp">, Group<m_x86_Features_Group>;
|
||||
def mfma4 : Flag<["-"], "mfma4">, Group<m_x86_Features_Group>;
|
||||
def mfma : Flag<["-"], "mfma">, Group<m_x86_Features_Group>;
|
||||
def mxop : Flag<["-"], "mxop">, Group<m_x86_Features_Group>;
|
||||
|
|
|
@ -2591,6 +2591,7 @@ class X86TargetInfo : public TargetInfo {
|
|||
bool HasRDSEED = false;
|
||||
bool HasADX = false;
|
||||
bool HasTBM = false;
|
||||
bool HasLWP = false;
|
||||
bool HasFMA = false;
|
||||
bool HasF16C = false;
|
||||
bool HasAVX512CD = false;
|
||||
|
@ -3363,6 +3364,7 @@ bool X86TargetInfo::initFeatureMap(
|
|||
case CK_BDVER1:
|
||||
// xop implies avx, sse4a and fma4.
|
||||
setFeatureEnabledImpl(Features, "xop", true);
|
||||
setFeatureEnabledImpl(Features, "lwp", true);
|
||||
setFeatureEnabledImpl(Features, "lzcnt", true);
|
||||
setFeatureEnabledImpl(Features, "aes", true);
|
||||
setFeatureEnabledImpl(Features, "pclmul", true);
|
||||
|
@ -3634,6 +3636,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
|
|||
HasADX = true;
|
||||
} else if (Feature == "+tbm") {
|
||||
HasTBM = true;
|
||||
} else if (Feature == "+lwp") {
|
||||
HasLWP = true;
|
||||
} else if (Feature == "+fma") {
|
||||
HasFMA = true;
|
||||
} else if (Feature == "+f16c") {
|
||||
|
@ -3949,6 +3953,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
|
|||
if (HasTBM)
|
||||
Builder.defineMacro("__TBM__");
|
||||
|
||||
if (HasLWP)
|
||||
Builder.defineMacro("__LWP__");
|
||||
|
||||
if (HasMWAITX)
|
||||
Builder.defineMacro("__MWAITX__");
|
||||
|
||||
|
@ -4132,6 +4139,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
|
|||
.Case("sse4.2", SSELevel >= SSE42)
|
||||
.Case("sse4a", XOPLevel >= SSE4A)
|
||||
.Case("tbm", HasTBM)
|
||||
.Case("lwp", HasLWP)
|
||||
.Case("x86", true)
|
||||
.Case("x86_32", getTriple().getArch() == llvm::Triple::x86)
|
||||
.Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64)
|
||||
|
|
|
@ -45,6 +45,7 @@ set(files
|
|||
inttypes.h
|
||||
iso646.h
|
||||
limits.h
|
||||
lwpintrin.h
|
||||
lzcntintrin.h
|
||||
mm3dnow.h
|
||||
mmintrin.h
|
||||
|
|
|
@ -0,0 +1,150 @@
|
|||
/*===---- lwpintrin.h - LWP intrinsics -------------------------------------===
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __X86INTRIN_H
|
||||
#error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __LWPINTRIN_H
|
||||
#define __LWPINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lwp")))
|
||||
|
||||
/// \brief Parses the LWPCB at the specified address and enables
|
||||
/// profiling if valid.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> LLWPCB </c> instruction.
|
||||
///
|
||||
/// \param __addr
|
||||
/// Address to the new Lightweight Profiling Control Block (LWPCB). If the
|
||||
/// LWPCB is valid, writes the address into the LWP_CBADDR MSR and enables
|
||||
/// Lightweight Profiling.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__llwpcb (void *__addr)
|
||||
{
|
||||
__builtin_ia32_llwpcb(__addr);
|
||||
}
|
||||
|
||||
/// \brief Flushes the LWP state to memory and returns the address of the LWPCB.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> SLWPCB </c> instruction.
|
||||
///
|
||||
/// \return
|
||||
/// Address to the current Lightweight Profiling Control Block (LWPCB).
|
||||
/// If LWP is not currently enabled, returns NULL.
|
||||
static __inline__ void* __DEFAULT_FN_ATTRS
|
||||
__slwpcb ()
|
||||
{
|
||||
return __builtin_ia32_slwpcb();
|
||||
}
|
||||
|
||||
/// \brief Inserts programmed event record into the LWP event ring buffer
|
||||
/// and advances the ring buffer pointer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> LWPINS </c> instruction.
|
||||
///
|
||||
/// \param DATA2
|
||||
/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field.
|
||||
/// \param DATA1
|
||||
/// A 32-bit value is inserted into the 32-bit Data1 field.
|
||||
/// \param FLAGS
|
||||
/// A 32-bit immediate value is inserted into the 32-bit Flags field.
|
||||
/// \returns If the ring buffer is full and LWP is running in Synchronized Mode,
|
||||
/// the event record overwrites the last record in the buffer, the MissedEvents
|
||||
/// counter in the LWPCB is incremented, the head pointer is not advanced, and
|
||||
/// 1 is returned. Otherwise 0 is returned.
|
||||
#define __lwpins32(DATA2, DATA1, FLAGS) \
|
||||
(__builtin_ia32_lwpins32((unsigned int) (DATA2), (unsigned int) (DATA1), \
|
||||
(unsigned int) (FLAGS)))
|
||||
|
||||
/// \brief Decrements the LWP programmed value sample event counter. If the result is
|
||||
/// negative, inserts an event record into the LWP event ring buffer in memory
|
||||
/// and advances the ring buffer pointer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> LWPVAL </c> instruction.
|
||||
///
|
||||
/// \param DATA2
|
||||
/// A 32-bit value is zero-extended and inserted into the 64-bit Data2 field.
|
||||
/// \param DATA1
|
||||
/// A 32-bit value is inserted into the 32-bit Data1 field.
|
||||
/// \param FLAGS
|
||||
/// A 32-bit immediate value is inserted into the 32-bit Flags field.
|
||||
#define __lwpval32(DATA2, DATA1, FLAGS) \
|
||||
(__builtin_ia32_lwpval32((unsigned int) (DATA2), (unsigned int) (DATA1), \
|
||||
(unsigned int) (FLAGS)))
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
/// \brief Inserts programmed event record into the LWP event ring buffer
|
||||
/// and advances the ring buffer pointer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> LWPINS </c> instruction.
|
||||
///
|
||||
/// \param DATA2
|
||||
/// A 64-bit value is inserted into the 64-bit Data2 field.
|
||||
/// \param DATA1
|
||||
/// A 32-bit value is inserted into the 32-bit Data1 field.
|
||||
/// \param FLAGS
|
||||
/// A 32-bit immediate value is inserted into the 32-bit Flags field.
|
||||
/// \returns If the ring buffer is full and LWP is running in Synchronized Mode,
|
||||
/// the event record overwrites the last record in the buffer, the MissedEvents
|
||||
/// counter in the LWPCB is incremented, the head pointer is not advanced, and
|
||||
/// 1 is returned. Otherwise 0 is returned.
|
||||
#define __lwpins64(DATA2, DATA1, FLAGS) \
|
||||
(__builtin_ia32_lwpins64((unsigned long long) (DATA2), (unsigned int) (DATA1), \
|
||||
(unsigned int) (FLAGS)))
|
||||
|
||||
/// \brief Decrements the LWP programmed value sample event counter. If the result is
|
||||
/// negative, inserts an event record into the LWP event ring buffer in memory
|
||||
/// and advances the ring buffer pointer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> LWPVAL </c> instruction.
|
||||
///
|
||||
/// \param DATA2
|
||||
/// A 64-bit value is and inserted into the 64-bit Data2 field.
|
||||
/// \param DATA1
|
||||
/// A 32-bit value is inserted into the 32-bit Data1 field.
|
||||
/// \param FLAGS
|
||||
/// A 32-bit immediate value is inserted into the 32-bit Flags field.
|
||||
#define __lwpval64(DATA2, DATA1, FLAGS) \
|
||||
(__builtin_ia32_lwpval64((unsigned long long) (DATA2), (unsigned int) (DATA1), \
|
||||
(unsigned int) (FLAGS)))
|
||||
|
||||
#endif
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* __LWPINTRIN_H */
|
|
@ -72,6 +72,10 @@
|
|||
#include <tbmintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__LWP__)
|
||||
#include <lwpintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)
|
||||
#include <f16cintrin.h>
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lwp -emit-llvm -o - -Wall -Werror | FileCheck %s
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
void test_llwpcb(void *ptr) {
|
||||
// CHECK-LABEL: @test_llwpcb
|
||||
// CHECK: call void @llvm.x86.llwpcb(i8* %{{.*}})
|
||||
__llwpcb(ptr);
|
||||
}
|
||||
|
||||
void* test_slwpcb() {
|
||||
// CHECK-LABEL: @test_slwpcb
|
||||
// CHECK: call i8* @llvm.x86.slwpcb()
|
||||
return __slwpcb();
|
||||
}
|
||||
|
||||
unsigned char test_lwpins32(unsigned val2, unsigned val1) {
|
||||
// CHECK-LABEL: @test_lwpins32
|
||||
// CHECK: call i8 @llvm.x86.lwpins32(i32
|
||||
return __lwpins32(val2, val1, 0x01234);
|
||||
}
|
||||
|
||||
unsigned char test_lwpins64(unsigned long long val2, unsigned val1) {
|
||||
// CHECK-LABEL: @test_lwpins64
|
||||
// CHECK: call i8 @llvm.x86.lwpins64(i64
|
||||
return __lwpins64(val2, val1, 0x56789);
|
||||
}
|
||||
|
||||
void test_lwpval32(unsigned val2, unsigned val1) {
|
||||
// CHECK-LABEL: @test_lwpval32
|
||||
// CHECK: call void @llvm.x86.lwpval32(i32
|
||||
__lwpval32(val2, val1, 0x01234);
|
||||
}
|
||||
|
||||
void test_lwpval64(unsigned long long val2, unsigned val1) {
|
||||
// CHECK-LABEL: @test_lwpval64
|
||||
// CHECK: call void @llvm.x86.lwpval64(i64
|
||||
__lwpval64(val2, val1, 0xABCDEF);
|
||||
}
|
Loading…
Reference in New Issue