Add support of RTM from TSX extension

- Add RTM code generation support throught 3 X86 intrinsics:
  xbegin()/xend() to start/end a transaction region, and xabort() to abort a
  tranaction region

llvm-svn: 167573
This commit is contained in:
Michael Liao 2012-11-08 07:28:54 +00:00
parent c3bd9f5c50
commit 73cffddb95
15 changed files with 223 additions and 54 deletions

View File

@ -2559,3 +2559,15 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_rdrand_32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [], []>;
def int_x86_rdrand_64 : Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>;
}
//===----------------------------------------------------------------------===//
// RTM intrinsics. Transactional Memory support.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_xbegin : GCCBuiltin<"__builtin_ia32_xbegin">,
Intrinsic<[llvm_i32_ty], [], []>;
def int_x86_xend : GCCBuiltin<"__builtin_ia32_xend">,
Intrinsic<[], [], []>;
def int_x86_xabort : GCCBuiltin<"__builtin_ia32_xabort">,
Intrinsic<[], [llvm_i8_ty], [IntrNoReturn]>;
}

View File

@ -18,19 +18,19 @@
using namespace llvm;
namespace {
class X86AsmLexer : public MCTargetAsmLexer {
const MCAsmInfo &AsmInfo;
bool tentativeIsValid;
AsmToken tentativeToken;
const AsmToken &lexTentative() {
tentativeToken = getLexer()->Lex();
tentativeIsValid = true;
return tentativeToken;
}
const AsmToken &lexDefinite() {
if (tentativeIsValid) {
tentativeIsValid = false;
@ -38,7 +38,7 @@ class X86AsmLexer : public MCTargetAsmLexer {
}
return getLexer()->Lex();
}
AsmToken LexTokenATT();
AsmToken LexTokenIntel();
protected:
@ -47,7 +47,7 @@ protected:
SetError(SMLoc(), "No MCAsmLexer installed");
return AsmToken(AsmToken::Error, "", 0);
}
switch (AsmInfo.getAssemblerDialect()) {
default:
SetError(SMLoc(), "Unhandled dialect");
@ -71,33 +71,32 @@ public:
AsmToken X86AsmLexer::LexTokenATT() {
AsmToken lexedToken = lexDefinite();
switch (lexedToken.getKind()) {
default:
return lexedToken;
case AsmToken::Error:
SetError(Lexer->getErrLoc(), Lexer->getErr());
return lexedToken;
case AsmToken::Percent: {
const AsmToken &nextToken = lexTentative();
if (nextToken.getKind() != AsmToken::Identifier)
return lexedToken;
if (unsigned regID = MatchRegisterName(nextToken.getString())) {
lexDefinite();
// FIXME: This is completely wrong when there is a space or other
// punctuation between the % and the register name.
StringRef regStr(lexedToken.getString().data(),
lexedToken.getString().size() +
lexedToken.getString().size() +
nextToken.getString().size());
return AsmToken(AsmToken::Register, regStr,
return AsmToken(AsmToken::Register, regStr,
static_cast<int64_t>(regID));
}
// Match register name failed. If this is "db[0-7]", match it as an alias
// for dr[0-7].
if (nextToken.getString().size() == 3 &&
@ -113,29 +112,29 @@ AsmToken X86AsmLexer::LexTokenATT() {
case '6': RegNo = X86::DR6; break;
case '7': RegNo = X86::DR7; break;
}
if (RegNo != -1) {
lexDefinite();
// FIXME: This is completely wrong when there is a space or other
// punctuation between the % and the register name.
StringRef regStr(lexedToken.getString().data(),
lexedToken.getString().size() +
lexedToken.getString().size() +
nextToken.getString().size());
return AsmToken(AsmToken::Register, regStr,
return AsmToken(AsmToken::Register, regStr,
static_cast<int64_t>(RegNo));
}
}
return lexedToken;
}
}
}
}
AsmToken X86AsmLexer::LexTokenIntel() {
const AsmToken &lexedToken = lexDefinite();
switch(lexedToken.getKind()) {
default:
return lexedToken;
@ -144,7 +143,7 @@ AsmToken X86AsmLexer::LexTokenIntel() {
return lexedToken;
case AsmToken::Identifier: {
unsigned regID = MatchRegisterName(lexedToken.getString().lower());
if (regID)
return AsmToken(AsmToken::Register,
lexedToken.getString(),

View File

@ -276,9 +276,9 @@ namespace X86II {
MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36,
MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40,
MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46,
MRM_D4 = 47, MRM_D8 = 48, MRM_D9 = 49, MRM_DA = 50,
MRM_DB = 51, MRM_DC = 52, MRM_DD = 53, MRM_DE = 54,
MRM_DF = 55,
MRM_D4 = 47, MRM_D5 = 48, MRM_D8 = 49, MRM_D9 = 50,
MRM_DA = 51, MRM_DB = 52, MRM_DC = 53, MRM_DD = 54,
MRM_DE = 55, MRM_DF = 56,
/// RawFrmImm8 - This is used for the ENTER instruction, which has two
/// immediates, the first of which is a 16-bit immediate (specified by
@ -580,11 +580,11 @@ namespace X86II {
case X86II::MRM_E8: case X86II::MRM_F0:
case X86II::MRM_F8: case X86II::MRM_F9:
case X86II::MRM_D0: case X86II::MRM_D1:
case X86II::MRM_D4: case X86II::MRM_D8:
case X86II::MRM_D9: case X86II::MRM_DA:
case X86II::MRM_DB: case X86II::MRM_DC:
case X86II::MRM_DD: case X86II::MRM_DE:
case X86II::MRM_DF:
case X86II::MRM_D4: case X86II::MRM_D5:
case X86II::MRM_D8: case X86II::MRM_D9:
case X86II::MRM_DA: case X86II::MRM_DB:
case X86II::MRM_DC: case X86II::MRM_DD:
case X86II::MRM_DE: case X86II::MRM_DF:
return -1;
}
}

View File

@ -1121,13 +1121,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM_C3: case X86II::MRM_C4:
case X86II::MRM_C8: case X86II::MRM_C9:
case X86II::MRM_D0: case X86II::MRM_D1:
case X86II::MRM_D4: case X86II::MRM_D8:
case X86II::MRM_D9: case X86II::MRM_DA:
case X86II::MRM_DB: case X86II::MRM_DC:
case X86II::MRM_DD: case X86II::MRM_DE:
case X86II::MRM_DF: case X86II::MRM_E8:
case X86II::MRM_F0: case X86II::MRM_F8:
case X86II::MRM_F9:
case X86II::MRM_D4: case X86II::MRM_D5:
case X86II::MRM_D8: case X86II::MRM_D9:
case X86II::MRM_DA: case X86II::MRM_DB:
case X86II::MRM_DC: case X86II::MRM_DD:
case X86II::MRM_DE: case X86II::MRM_DF:
case X86II::MRM_E8: case X86II::MRM_F0:
case X86II::MRM_F8: case X86II::MRM_F9:
EmitByte(BaseOpcode, CurByte, OS);
unsigned char MRM;
@ -1142,6 +1142,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM_D0: MRM = 0xD0; break;
case X86II::MRM_D1: MRM = 0xD1; break;
case X86II::MRM_D4: MRM = 0xD4; break;
case X86II::MRM_D5: MRM = 0xD5; break;
case X86II::MRM_D8: MRM = 0xD8; break;
case X86II::MRM_D9: MRM = 0xD9; break;
case X86II::MRM_DA: MRM = 0xDA; break;

View File

@ -118,6 +118,8 @@ def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true",
"Support BMI instructions">;
def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
"Support BMI2 instructions">;
def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
"Support RTM instructions">;
def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
@ -192,7 +194,8 @@ def : Proc<"core-avx2", [FeatureAVX2, FeatureCMPXCHG16B, FeaturePOPCNT,
FeatureAES, FeaturePCLMUL, FeatureRDRAND,
FeatureF16C, FeatureFSGSBase,
FeatureMOVBE, FeatureLZCNT, FeatureBMI,
FeatureBMI2, FeatureFMA]>;
FeatureBMI2, FeatureFMA,
FeatureRTM]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [Feature3DNow]>;

View File

@ -12195,6 +12195,63 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
// private utility function
/// Utility function to emit xbegin specifying the start of an RTM region.
MachineBasicBlock *
X86TargetLowering::EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = MBB;
++I;
// For the v = xbegin(), we generate
//
// thisMBB:
// xbegin sinkMBB
//
// mainMBB:
// eax = -1
//
// sinkMBB:
// v = eax
MachineBasicBlock *thisMBB = MBB;
MachineFunction *MF = MBB->getParent();
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(I, mainMBB);
MF->insert(I, sinkMBB);
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), MBB,
llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB:
// xbegin sinkMBB
// # fallthrough to mainMBB
// # abortion to sinkMBB
BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(sinkMBB);
thisMBB->addSuccessor(mainMBB);
thisMBB->addSuccessor(sinkMBB);
// mainMBB:
// EAX = -1
BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), X86::EAX).addImm(-1);
mainMBB->addSuccessor(sinkMBB);
// sinkMBB:
// EAX is live into the sinkMBB
sinkMBB->addLiveIn(X86::EAX);
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
.addReg(X86::EAX);
MI->eraseFromParent();
return sinkMBB;
}
// Get CMPXCHG opcode for the specified data type.
static unsigned getCmpXChgOpcode(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
@ -13858,6 +13915,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::MONITOR:
return EmitMonitor(MI, BB);
// xbegin
case X86::XBEGIN:
return EmitXBegin(MI, BB);
// Atomic Lowering.
case X86::ATOMAND8:
case X86::ATOMAND16:

View File

@ -898,6 +898,10 @@ namespace llvm {
MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI,
MachineBasicBlock *MBB) const;
/// Utility function to emit xbegin specifying the start of an RTM region.
MachineBasicBlock *EmitXBegin(MachineInstr *MI,
MachineBasicBlock *MBB) const;
// Utility function to emit the low-level va_arg code for X86-64.
MachineBasicBlock *EmitVAARG64WithCustomInserter(
MachineInstr *MI,

View File

@ -44,14 +44,15 @@ def RawFrmImm16 : Format<44>;
def MRM_D0 : Format<45>;
def MRM_D1 : Format<46>;
def MRM_D4 : Format<47>;
def MRM_D8 : Format<48>;
def MRM_D9 : Format<49>;
def MRM_DA : Format<50>;
def MRM_DB : Format<51>;
def MRM_DC : Format<52>;
def MRM_DD : Format<53>;
def MRM_DE : Format<54>;
def MRM_DF : Format<55>;
def MRM_D5 : Format<48>;
def MRM_D8 : Format<49>;
def MRM_D9 : Format<50>;
def MRM_DA : Format<51>;
def MRM_DB : Format<52>;
def MRM_DC : Format<53>;
def MRM_DD : Format<54>;
def MRM_DE : Format<55>;
def MRM_DF : Format<56>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our

View File

@ -589,6 +589,7 @@ def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">;
def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">;
def HasBMI : Predicate<"Subtarget->hasBMI()">;
def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
def HasRTM : Predicate<"Subtarget->hasRTM()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
@ -1706,6 +1707,8 @@ include "X86Instr3DNow.td"
include "X86InstrVMX.td"
include "X86InstrSVM.td"
include "X86InstrTSX.td"
// System instructions.
include "X86InstrSystem.td"

View File

@ -0,0 +1,32 @@
//===-- X86InstrVMX.td - TSX Instruction Set Extension -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the instructions that make up the Intel TSX instruction
// set.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// TSX instructions
let usesCustomInserter = 1 in
def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
"# XBEGIN", [(set GR32:$dst, (int_x86_xbegin))]>,
Requires<[HasRTM]>;
let isBranch = 1, isTerminator = 1, Defs = [EAX] in
def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget:$dst),
"xbegin\t$dst", []>;
def XEND : I<0x01, MRM_D5, (outs), (ins),
"xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>;
def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
"xabort\t$imm",
[(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>;

View File

@ -302,6 +302,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
HasBMI2 = true;
ToggleFeature(X86::FeatureBMI2);
}
if (IsIntel && ((EBX >> 11) & 0x1)) {
HasRTM = true;
ToggleFeature(X86::FeatureRTM);
}
}
}
}
@ -330,6 +334,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
, HasLZCNT(false)
, HasBMI(false)
, HasBMI2(false)
, HasRTM(false)
, IsBTMemSlow(false)
, IsUAMemFast(false)
, HasVectorUAMem(false)

View File

@ -118,6 +118,9 @@ protected:
/// HasBMI2 - Processor has BMI2 instructions.
bool HasBMI2;
/// HasRTM - Processor has RTM instructions.
bool HasRTM;
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
@ -219,6 +222,7 @@ public:
bool hasLZCNT() const { return HasLZCNT; }
bool hasBMI() const { return HasBMI; }
bool hasBMI2() const { return HasBMI2; }
bool hasRTM() const { return HasRTM; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; }

View File

@ -0,0 +1,30 @@
; RUN: llc < %s -mattr=+rtm -mtriple=x86_64-unknown-unknown | FileCheck %s
declare i32 @llvm.x86.xbegin() nounwind
declare void @llvm.x86.xend() nounwind
declare void @llvm.x86.xabort(i8) noreturn nounwind
define i32 @test_xbegin() nounwind uwtable {
entry:
%0 = tail call i32 @llvm.x86.xbegin() nounwind
ret i32 %0
; CHECK: test_xbegin
; CHECK: xbegin [[LABEL:.*BB.*]]
; CHECK: [[LABEL]]:
}
define void @test_xend() nounwind uwtable {
entry:
tail call void @llvm.x86.xend() nounwind
ret void
; CHECK: test_xend
; CHECK: xend
}
define void @test_xabort() nounwind uwtable {
entry:
tail call void @llvm.x86.xabort(i8 2)
unreachable
; CHECK: test_xabort
; CHECK: xabort $2
}

View File

@ -0,0 +1,13 @@
// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
// CHECK: xbegin .L0
// CHECK: encoding: [0xc7,0xf8,A,A,A,A]
xbegin .L0
// CHECK: xend
// CHECK: encoding: [0x0f,0x01,0xd5]
xend
// CHECK: xabort
// CHECK: encoding: [0xc6,0xf8,0x0d]
xabort $13

View File

@ -38,14 +38,15 @@ using namespace llvm;
MAP(D0, 45) \
MAP(D1, 46) \
MAP(D4, 47) \
MAP(D8, 48) \
MAP(D9, 49) \
MAP(DA, 50) \
MAP(DB, 51) \
MAP(DC, 52) \
MAP(DD, 53) \
MAP(DE, 54) \
MAP(DF, 55)
MAP(D5, 48) \
MAP(D8, 49) \
MAP(D9, 50) \
MAP(DA, 51) \
MAP(DB, 52) \
MAP(DC, 53) \
MAP(DD, 54) \
MAP(DE, 55) \
MAP(DF, 56)
// A clone of X86 since we can't depend on something that is generated.
namespace X86Local {