[X86] Add RDPRU instruction

Add support for the RDPRU instruction on Zen2 processors.

User-facing features:

- Clang option -m[no-]rdpru to enable/disable the feature
- Support is implicit for znver2/znver3 processors
- Preprocessor symbol __RDPRU__ to indicate support
- Header rdpruintrin.h to define intrinsics
- "rdpru" mnemonic supported for assembler code

Internal features:

- Clang builtin __builtin_ia32_rdpru
- IR intrinsic @llvm.x86.rdpru

Differential Revision: https://reviews.llvm.org/D128934
This commit is contained in:
Paul Robinson 2022-06-30 13:23:41 -07:00
parent 10f6d61bf1
commit 08e4fe6c61
24 changed files with 267 additions and 3 deletions

View File

@ -527,6 +527,9 @@ X86 Support in Clang
- Support for the ``_Float16`` type has been added for all targets with SSE2.
When AVX512-FP16 is not available, arithmetic on ``_Float16`` is emulated
using ``float``.
- Added the ``-m[no-]rdpru`` flag to enable/disable the RDPRU instruction
provided by AMD Zen2 and later processors. Defined intrinsics for using
this instruction (see rdpruintrin.h).
DWARF Support in Clang
----------------------

View File

@ -825,6 +825,7 @@ BUILTIN(__rdtsc, "UOi", "")
BUILTIN(__builtin_ia32_rdtscp, "UOiUi*", "")
TARGET_BUILTIN(__builtin_ia32_rdpid, "Ui", "n", "rdpid")
TARGET_BUILTIN(__builtin_ia32_rdpru, "ULLii", "n", "rdpru")
// PKU
TARGET_BUILTIN(__builtin_ia32_rdpkru, "Ui", "n", "pku")

View File

@ -4570,6 +4570,8 @@ def mptwrite : Flag<["-"], "mptwrite">, Group<m_x86_Features_Group>;
def mno_ptwrite : Flag<["-"], "mno-ptwrite">, Group<m_x86_Features_Group>;
def mrdpid : Flag<["-"], "mrdpid">, Group<m_x86_Features_Group>;
def mno_rdpid : Flag<["-"], "mno-rdpid">, Group<m_x86_Features_Group>;
def mrdpru : Flag<["-"], "mrdpru">, Group<m_x86_Features_Group>;
def mno_rdpru : Flag<["-"], "mno-rdpru">, Group<m_x86_Features_Group>;
def mrdrnd : Flag<["-"], "mrdrnd">, Group<m_x86_Features_Group>;
def mno_rdrnd : Flag<["-"], "mno-rdrnd">, Group<m_x86_Features_Group>;
def mrtm : Flag<["-"], "mrtm">, Group<m_x86_Features_Group>;

View File

@ -297,6 +297,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasCLDEMOTE = true;
} else if (Feature == "+rdpid") {
HasRDPID = true;
} else if (Feature == "+rdpru") {
HasRDPRU = true;
} else if (Feature == "+kl") {
HasKL = true;
} else if (Feature == "+widekl") {
@ -743,6 +745,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__WIDEKL__");
if (HasRDPID)
Builder.defineMacro("__RDPID__");
if (HasRDPRU)
Builder.defineMacro("__RDPRU__");
if (HasCLDEMOTE)
Builder.defineMacro("__CLDEMOTE__");
if (HasWAITPKG)
@ -926,6 +930,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("prfchw", true)
.Case("ptwrite", true)
.Case("rdpid", true)
.Case("rdpru", true)
.Case("rdrnd", true)
.Case("rdseed", true)
.Case("rtm", true)
@ -1021,6 +1026,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("prfchw", HasPRFCHW)
.Case("ptwrite", HasPTWRITE)
.Case("rdpid", HasRDPID)
.Case("rdpru", HasRDPRU)
.Case("rdrnd", HasRDRND)
.Case("rdseed", HasRDSEED)
.Case("retpoline-external-thunk", HasRetpolineExternalThunk)

View File

@ -125,6 +125,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasMOVBE = false;
bool HasPREFETCHWT1 = false;
bool HasRDPID = false;
bool HasRDPRU = false;
bool HasRetpolineExternalThunk = false;
bool HasLAHFSAHF = false;
bool HasWBNOINVD = false;

View File

@ -170,6 +170,7 @@ set(x86_files
popcntintrin.h
prfchwintrin.h
ptwriteintrin.h
rdpruintrin.h
rdseedintrin.h
rtmintrin.h
serializeintrin.h

View File

@ -0,0 +1,57 @@
/*===---- rdpruintrin.h - RDPRU intrinsics ---------------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
*===-----------------------------------------------------------------------===
*/
#if !defined __X86INTRIN_H
#error "Never use <rdpruintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef __RDPRUINTRIN_H
#define __RDPRUINTRIN_H
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("rdpru")))
/// Reads the content of a processor register.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic corresponds to the <c> RDPRU </c> instruction.
///
/// \param reg_id
/// A processor register identifier.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__rdpru (int reg_id)
{
return __builtin_ia32_rdpru(reg_id);
}
#define __RDPRU_MPERF 0
#define __RDPRU_APERF 1
/// Reads the content of processor register MPERF.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic generates instruction <c> RDPRU </c> to read the value of
/// register MPERF.
#define __mperf() __builtin_ia32_rdpru(__RDPRU_MPERF)
/// Reads the content of processor register APERF.
///
/// \headerfile <x86intrin.h>
///
/// This intrinsic generates instruction <c> RDPRU </c> to read the value of
/// register APERF.
#define __aperf() __builtin_ia32_rdpru(__RDPRU_APERF)
#undef __DEFAULT_FN_ATTRS
#endif /* __RDPRUINTRIN_H */

View File

@ -59,5 +59,9 @@
#include <clzerointrin.h>
#endif
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__RDPRU__)
#include <rdpruintrin.h>
#endif
#endif /* __X86INTRIN_H */

View File

@ -0,0 +1,37 @@
// RUN: %clang_cc1 -ffreestanding %s -triple=i686-- -target-feature +rdpru -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +rdpru -emit-llvm -o - -Wall -Werror | FileCheck %s
// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-cpu znver2 -emit-llvm -o - -Wall -Werror | FileCheck %s
#include <x86intrin.h>
// NOTE: This should correspond to the tests in llvm/test/CodeGen/X86/rdpru.ll
unsigned long long test_rdpru(int regid) {
// CHECK-LABEL: test_rdpru
// CHECK: [[RESULT:%.*]] = call i64 @llvm.x86.rdpru(i32 %{{.*}})
// CHECK-NEXT: ret i64 [[RESULT]]
return __rdpru(regid);
}
unsigned long long test_mperf() {
// CHECK-LABEL: test_mperf
// CHECK: [[RESULT:%.*]] = call i64 @llvm.x86.rdpru(i32 0)
// CHECK-NEXT: ret i64 [[RESULT]]
return __mperf();
}
unsigned long long test_aperf() {
// CHECK-LABEL: test_aperf
// CHECK: [[RESULT:%.*]] = call i64 @llvm.x86.rdpru(i32 1)
// CHECK-NEXT: ret i64 [[RESULT]]
return __aperf();
}
void test_direct_calls_to_builtin_rdpru(int regid) {
// CHECK: call i64 @llvm.x86.rdpru(i32 0)
// CHECK: call i64 @llvm.x86.rdpru(i32 1)
// CHECK: call i64 @llvm.x86.rdpru(i32 %{{.*}})
(void) __builtin_ia32_rdpru(0);
(void) __builtin_ia32_rdpru(1);
(void) __builtin_ia32_rdpru(regid);
}

View File

@ -136,6 +136,11 @@
// RDPID: "-target-feature" "+rdpid"
// NO-RDPID: "-target-feature" "-rdpid"
// RUN: %clang --target=i386 -march=i386 -mrdpru %s -### 2>&1 | FileCheck -check-prefix=RDPRU %s
// RUN: %clang --target=i386 -march=i386 -mno-rdpru %s -### 2>&1 | FileCheck -check-prefix=NO-RDPRU %s
// RDPRU: "-target-feature" "+rdpru"
// NO-RDPRU: "-target-feature" "-rdpru"
// RUN: %clang -target i386-linux-gnu -mretpoline %s -### 2>&1 | FileCheck -check-prefix=RETPOLINE %s
// RUN: %clang -target i386-linux-gnu -mno-retpoline %s -### 2>&1 | FileCheck -check-prefix=NO-RETPOLINE %s
// RETPOLINE: "-target-feature" "+retpoline-indirect-calls" "-target-feature" "+retpoline-indirect-branches"

View File

@ -588,3 +588,11 @@
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-crc32 -x c -E -dM -o - %s | FileCheck -check-prefix=NOCRC32 %s
// NOCRC32-NOT: #define __CRC32__ 1
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mrdpru -x c -E -dM -o - %s | FileCheck -check-prefix=RDPRU %s
// RDPRU: #define __RDPRU__ 1
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-rdpru -x c -E -dM -o - %s | FileCheck -check-prefix=NORDPRU %s
// NORDPRU-NOT: #define __RDPRU__ 1

View File

@ -163,6 +163,7 @@ Changes to the X86 Backend
--------------------------
* Support ``half`` type on SSE2 and above targets.
* Support ``rdpru`` instruction on Zen2 and above targets.
Changes to the OCaml bindings
-----------------------------

View File

@ -72,6 +72,12 @@ let TargetPrefix = "x86" in {
[ImmArg<ArgIndex<1>>]>;
}
// Read Processor Register.
let TargetPrefix = "x86" in {
def int_x86_rdpru : ClangBuiltin<"__builtin_ia32_rdpru">,
Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>;
}
//===----------------------------------------------------------------------===//
// CET SS
let TargetPrefix = "x86" in {

View File

@ -178,6 +178,7 @@ X86_FEATURE (PREFETCHWT1, "prefetchwt1")
X86_FEATURE (PRFCHW, "prfchw")
X86_FEATURE (PTWRITE, "ptwrite")
X86_FEATURE (RDPID, "rdpid")
X86_FEATURE (RDPRU, "rdpru")
X86_FEATURE (RDRND, "rdrnd")
X86_FEATURE (RDSEED, "rdseed")
X86_FEATURE (RTM, "rtm")

View File

@ -285,8 +285,9 @@ constexpr FeatureBitset FeaturesZNVER1 =
FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 |
FeatureSSE4_2 | FeatureSSE4_A | FeatureXSAVE | FeatureXSAVEC |
FeatureXSAVEOPT | FeatureXSAVES;
constexpr FeatureBitset FeaturesZNVER2 =
FeaturesZNVER1 | FeatureCLWB | FeatureRDPID | FeatureWBNOINVD;
constexpr FeatureBitset FeaturesZNVER2 = FeaturesZNVER1 | FeatureCLWB |
FeatureRDPID | FeatureRDPRU |
FeatureWBNOINVD;
static constexpr FeatureBitset FeaturesZNVER3 = FeaturesZNVER2 |
FeatureINVPCID | FeaturePKU |
FeatureVAES | FeatureVPCLMULQDQ;
@ -490,6 +491,7 @@ constexpr FeatureBitset ImpliedFeaturesPREFETCHWT1 = {};
constexpr FeatureBitset ImpliedFeaturesPRFCHW = {};
constexpr FeatureBitset ImpliedFeaturesPTWRITE = {};
constexpr FeatureBitset ImpliedFeaturesRDPID = {};
constexpr FeatureBitset ImpliedFeaturesRDPRU = {};
constexpr FeatureBitset ImpliedFeaturesRDRND = {};
constexpr FeatureBitset ImpliedFeaturesRDSEED = {};
constexpr FeatureBitset ImpliedFeaturesRTM = {};

View File

@ -266,6 +266,8 @@ def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
"Write Back No Invalidate">;
def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
"Support RDPID instructions">;
def FeatureRDPRU : SubtargetFeature<"rdpru", "HasRDPRU", "true",
"Support RDPRU instructions">;
def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
"Wait and pause enhancements">;
def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
@ -1238,6 +1240,7 @@ def ProcessorFeatures {
TuningInsertVZEROUPPER];
list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
FeatureRDPID,
FeatureRDPRU,
FeatureWBNOINVD];
list<SubtargetFeature> ZN2Tuning = ZNTuning;
list<SubtargetFeature> ZN2Features =

View File

@ -27887,11 +27887,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
}
// Read Performance Monitoring Counters.
case RDPMC:
// Read Processor Register.
case RDPRU:
// GetExtended Control Register.
case XGETBV: {
SmallVector<SDValue, 2> Results;
// RDPMC uses ECX to select the index of the performance counter to read.
// RDPRU uses ECX to select the processor register to read.
// XGETBV uses ECX to select the index of the XCR register to return.
// The result is stored into registers EDX:EAX.
expandIntrinsicWChainHelper(Op.getNode(), dl, DAG, IntrData->Opc0, X86::ECX,
@ -32894,6 +32897,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPMC, X86::ECX, Subtarget,
Results);
return;
case Intrinsic::x86_rdpru:
expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPRU, X86::ECX, Subtarget,
Results);
return;
case Intrinsic::x86_xgetbv:
expandIntrinsicWChainHelper(N, dl, DAG, X86::XGETBV, X86::ECX, Subtarget,
Results);

View File

@ -978,6 +978,7 @@ def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">;
def HasCLWB : Predicate<"Subtarget->hasCLWB()">;
def HasWBNOINVD : Predicate<"Subtarget->hasWBNOINVD()">;
def HasRDPID : Predicate<"Subtarget->hasRDPID()">;
def HasRDPRU : Predicate<"Subtarget->hasRDPRU()">;
def HasWAITPKG : Predicate<"Subtarget->hasWAITPKG()">;
def HasINVPCID : Predicate<"Subtarget->hasINVPCID()">;
def HasCX8 : Predicate<"Subtarget->hasCX8()">;

View File

@ -734,6 +734,15 @@ def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst),
Requires<[In64BitMode, HasPTWRITE]>;
} // SchedRW
//===----------------------------------------------------------------------===//
// RDPRU - Read Processor Register instruction.
let SchedRW = [WriteSystem] in {
let Uses = [ECX], Defs = [EAX, EDX] in
def RDPRU : I<0x01, MRM_FD, (outs), (ins), "rdpru", []>, PS,
Requires<[HasRDPRU]>;
}
//===----------------------------------------------------------------------===//
// Platform Configuration instruction

View File

@ -37,7 +37,7 @@ enum IntrinsicType : uint16_t {
TRUNCATE_TO_REG, CVTPS2PH_MASK, CVTPD2DQ_MASK, CVTQQ2PS_MASK,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
FIXUPIMM, FIXUPIMM_MASKZ, GATHER_AVX2,
ROUNDP, ROUNDS
ROUNDP, ROUNDS, RDPRU
};
struct IntrinsicData {
@ -309,6 +309,7 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_scattersiv8_sf, SCATTER, 0, 0),
X86_INTRINSIC_DATA(avx512_scattersiv8_si, SCATTER, 0, 0),
X86_INTRINSIC_DATA(rdpmc, RDPMC, X86::RDPMC, 0),
X86_INTRINSIC_DATA(rdpru, RDPRU, X86::RDPRU, 0),
X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0),
X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0),
X86_INTRINSIC_DATA(rdrand_64, RDRAND, X86ISD::RDRAND, 0),

View File

@ -0,0 +1,85 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- -mattr=+rdpru | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=i686-- -mattr=+rdpru -fast-isel | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-- -mattr=+rdpru | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-- -mattr=+rdpru -fast-isel | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 -fast-isel | FileCheck %s --check-prefix=X64
define void @rdpru_asm() {
; X86-LABEL: rdpru_asm:
; X86: # %bb.0: # %entry
; X86-NEXT: #APP
; X86-NEXT: rdpru
; X86-NEXT: #NO_APP
; X86-NEXT: retl
;
; X64-LABEL: rdpru_asm:
; X64: # %bb.0: # %entry
; X64-NEXT: #APP
; X64-NEXT: rdpru
; X64-NEXT: #NO_APP
; X64-NEXT: retq
entry:
call void asm sideeffect "rdpru", "~{dirflag},~{fpsr},~{flags}"()
ret void
}
define i64 @rdpru_param(i32 %regid) local_unnamed_addr {
; X86-LABEL: rdpru_param:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: rdpru
; X86-NEXT: retl
;
; X64-LABEL: rdpru_param:
; X64: # %bb.0: # %entry
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: rdpru
; X64-NEXT: shlq $32, %rdx
; X64-NEXT: orq %rdx, %rax
; X64-NEXT: retq
entry:
%0 = tail call i64 @llvm.x86.rdpru(i32 %regid)
ret i64 %0
}
define i64 @rdpru_mperf() local_unnamed_addr {
; X86-LABEL: rdpru_mperf:
; X86: # %bb.0: # %entry
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: rdpru
; X86-NEXT: retl
;
; X64-LABEL: rdpru_mperf:
; X64: # %bb.0: # %entry
; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: rdpru
; X64-NEXT: shlq $32, %rdx
; X64-NEXT: orq %rdx, %rax
; X64-NEXT: retq
entry:
%0 = tail call i64 @llvm.x86.rdpru(i32 0)
ret i64 %0
}
define i64 @rdpru_aperf() local_unnamed_addr {
; X86-LABEL: rdpru_aperf:
; X86: # %bb.0: # %entry
; X86-NEXT: movl $1, %ecx
; X86-NEXT: rdpru
; X86-NEXT: retl
;
; X64-LABEL: rdpru_aperf:
; X64: # %bb.0: # %entry
; X64-NEXT: movl $1, %ecx
; X64-NEXT: rdpru
; X64-NEXT: shlq $32, %rdx
; X64-NEXT: orq %rdx, %rax
; X64-NEXT: retq
entry:
%0 = tail call i64 @llvm.x86.rdpru(i32 1)
ret i64 %0
}
declare i64 @llvm.x86.rdpru(i32)

View File

@ -1015,3 +1015,6 @@
# CHECK: hreset $1
0xf3 0x0f 0x3a 0xf0 0xc0 0x01
# CHECK: rdpru
0x0f,0x01,0xfd

View File

@ -758,3 +758,6 @@
# CHECK: senduipi %r13
0xf3,0x41,0x0f,0xc7,0xf5
# CHECK: rdpru
0x0f,0x01,0xfd

17
llvm/test/MC/X86/RDPRU.s Normal file
View File

@ -0,0 +1,17 @@
/// Encoding and disassembly of rdpru.
// RUN: llvm-mc -triple i686-- --show-encoding %s |\
// RUN: FileCheck %s --check-prefixes=CHECK,ENCODING
// RUN: llvm-mc -triple i686-- -filetype=obj %s |\
// RUN: llvm-objdump -d - | FileCheck %s
// RUN: llvm-mc -triple x86_64-- --show-encoding %s |\
// RUN: FileCheck %s --check-prefixes=CHECK,ENCODING
// RUN: llvm-mc -triple x86_64-- -filetype=obj %s |\
// RUN: llvm-objdump -d - | FileCheck %s
// CHECK: rdpru
// ENCODING: encoding: [0x0f,0x01,0xfd]
rdpru