[XRay] Custom event logging intrinsic

This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.

Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.

Reviewers: timshen, dberris

Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits

Differential Revision: https://reviews.llvm.org/D27503

llvm-svn: 302405
This commit is contained in:
Dean Michael Berris 2017-05-08 05:45:21 +00:00
parent 389d8cebd1
commit 9bcaed867a
11 changed files with 184 additions and 0 deletions

View File

@ -226,6 +226,7 @@ public:
FUNCTION_EXIT = 1,
TAIL_CALL = 2,
LOG_ARGS_ENTER = 3,
CUSTOM_EVENT = 4,
};
// The table will contain these structs that point to the sled, the function

View File

@ -506,6 +506,7 @@ protected:
bool selectCast(const User *I, unsigned Opcode);
bool selectExtractValue(const User *I);
bool selectInsertValue(const User *I);
bool selectXRayCustomEvent(const CallInst *II);
private:
/// \brief Handle PHI nodes in successor blocks.

View File

@ -795,6 +795,14 @@ def int_type_checked_load : Intrinsic<[llvm_ptr_ty, llvm_i1_ty],
def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
[IntrReadMem, IntrArgMemOnly]>;
// Xray intrinsics
//===----------------------------------------------------------------------===//
// Custom event logging for x-ray.
// Takes a pointer to a string and the length of the string.
def int_xray_customevent : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
[NoCapture<0>, ReadOnly<0>, IntrWriteMem]>;
//===----------------------------------------------------------------------===//
//===------ Memory intrinsics with element-wise atomicity guarantees ------===//
//

View File

@ -1002,6 +1002,16 @@ def PATCHABLE_TAIL_CALL : Instruction {
let hasSideEffects = 1;
let isReturn = 1;
}
def PATCHABLE_EVENT_CALL : Instruction {
let OutOperandList = (outs);
let InOperandList = (ins ptr_rc:$event, i8imm:$size);
let AsmString = "# XRay Custom Event Log.";
let usesCustomInserter = 1;
let isCall = 1;
let mayLoad = 1;
let mayStore = 1;
let hasSideEffects = 1;
}
def FENTRY_CALL : Instruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins variable_ops);

View File

@ -182,6 +182,10 @@ HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_EXIT)
/// PATCHABLE_RET which specifically only works for return instructions.
HANDLE_TARGET_OPCODE(PATCHABLE_TAIL_CALL)
/// Wraps a logging call and its arguments with nop sleds. At runtime, this can be
/// patched to insert instrumentation instructions.
HANDLE_TARGET_OPCODE(PATCHABLE_EVENT_CALL)
/// The following generic opcodes are not supposed to appear after ISel.
/// This is something we might want to relax, but for now, this is convenient
/// to produce diagnostics.

View File

@ -861,6 +861,25 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
return true;
}
bool FastISel::selectXRayCustomEvent(const CallInst *I) {
const auto &Triple = TM.getTargetTriple();
if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
return true; // don't do anything to this instruction.
SmallVector<MachineOperand, 8> Ops;
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
/*IsDef=*/false));
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
/*IsDef=*/false));
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::PATCHABLE_EVENT_CALL));
for (auto &MO : Ops)
MIB.add(MO);
// Insert the Patchable Event Call instruction, that gets lowered properly.
return true;
}
/// Returns an AttributeList representing the attributes applied to the return
/// value of the given call.
static AttributeList getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
@ -1252,6 +1271,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
return selectPatchpoint(II);
case Intrinsic::xray_customevent:
return selectXRayCustomEvent(II);
}
return fastLowerIntrinsicCall(II);

View File

@ -5703,7 +5703,37 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, N);
return nullptr;
}
case Intrinsic::xray_customevent: {
// Here we want to make sure that the intrinsic behaves as if it has a
// specific calling convention, and only for x86_64.
// FIXME: Support other platforms later.
const auto &Triple = DAG.getTarget().getTargetTriple();
if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
return nullptr;
SDLoc DL = getCurSDLoc();
SmallVector<SDValue, 8> Ops;
// We want to say that we always want the arguments in registers.
SDValue LogEntryVal = getValue(I.getArgOperand(0));
SDValue StrSizeVal = getValue(I.getArgOperand(1));
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Chain = getRoot();
Ops.push_back(LogEntryVal);
Ops.push_back(StrSizeVal);
Ops.push_back(Chain);
// We need to enforce the calling convention for the callsite, so that
// argument ordering is enforced correctly, and that register allocation can
// see that some registers may be assumed clobbered and have to preserve
// them across calls to the intrinsic.
MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL,
DL, NodeTys, Ops);
SDValue patchableNode = SDValue(MN, 0);
DAG.setRoot(patchableNode);
setValue(&I, patchableNode);
return nullptr;
}
case Intrinsic::experimental_deoptimize:
LowerDeoptimizeCall(&I);
return nullptr;

View File

@ -91,6 +91,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
X86MCInstLower &MCIL);
void LowerPATCHABLE_RET(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerFENTRY_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);

View File

@ -26517,6 +26517,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
case TargetOpcode::PATCHABLE_EVENT_CALL:
// Do nothing here, handle in xray instrumentation pass.
return BB;
case X86::LCMPXCHG8B: {
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();

View File

@ -1040,6 +1040,83 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
getSubtargetInfo());
}
void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
X86MCInstLower &MCIL) {
assert(Subtarget->is64Bit() && "XRay custom events only suports X86-64");
// We want to emit the following pattern, which follows the x86 calling
// convention to prepare for the trampoline call to be patched in.
//
// <args placement according SysV64 calling convention>
// .p2align 1, ...
// .Lxray_event_sled_N:
// jmp +N // jump across the call instruction
// callq __xray_CustomEvent // force relocation to symbol
// <args cleanup, jump to here>
//
// The relative jump needs to jump forward 24 bytes:
// 10 (args) + 5 (nops) + 9 (cleanup)
//
// After patching, it would look something like:
//
// nopw (2-byte nop)
// callq __xrayCustomEvent // already lowered
//
// ---
// First we emit the label and the jump.
auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
OutStreamer->AddComment("# XRay Custom Event Log");
OutStreamer->EmitCodeAlignment(2);
OutStreamer->EmitLabel(CurSled);
// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
// an operand (computed as an offset from the jmp instruction).
// FIXME: Find another less hacky way do force the relative jump.
OutStreamer->EmitBytes("\xeb\x14");
// The default C calling convention will place two arguments into %rcx and
// %rdx -- so we only work with those.
unsigned UsedRegs[] = {X86::RDI, X86::RSI, X86::RAX};
// Because we will use %rax, we preserve that across the call.
EmitAndCountInstruction(MCInstBuilder(X86::PUSH64r).addReg(X86::RAX));
// Then we put the operands in the %rdi and %rsi registers.
for (unsigned I = 0; I < MI.getNumOperands(); ++I)
if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
if (Op->isImm())
EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri)
.addReg(UsedRegs[I])
.addImm(Op->getImm()));
else if (Op->isReg()) {
if (Op->getReg() != UsedRegs[I])
EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr)
.addReg(UsedRegs[I])
.addReg(Op->getReg()));
else
EmitNops(*OutStreamer, 3, Subtarget->is64Bit(), getSubtargetInfo());
}
}
// We emit a hard dependency on the __xray_CustomEvent symbol, which is the
// name of the trampoline to be implemented by the XRay runtime. We put this
// explicitly in the %rax register.
auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri)
.addReg(X86::RAX)
.addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
// Emit the call instruction.
EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(X86::RAX));
// Restore caller-saved and used registers.
OutStreamer->AddComment("xray custom event end.");
EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(X86::RAX));
recordSled(CurSled, MI, SledKind::CUSTOM_EVENT);
}
void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
X86MCInstLower &MCIL) {
// We want to emit the following pattern:
@ -1415,6 +1492,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case TargetOpcode::PATCHABLE_TAIL_CALL:
return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
case TargetOpcode::PATCHABLE_EVENT_CALL:
return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
case X86::MORESTACK_RET:
EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));

View File

@ -0,0 +1,23 @@
; RUN: llc -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
define i32 @fn() nounwind noinline uwtable "function-instrument"="xray-always" {
%eventptr = alloca i8
%eventsize = alloca i32
store i32 3, i32* %eventsize
%val = load i32, i32* %eventsize
call void @llvm.xray.customevent(i8* %eventptr, i32 %val)
; CHECK-LABEL: Lxray_event_sled_0:
; CHECK-NEXT: .ascii "\353\024
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movq {{.*}}, %rdi
; CHECK-NEXT: movq {{.*}}, %rsi
; CHECK-NEXT: movabsq $__xray_CustomEvent, %rax
; CHECK-NEXT: callq *%rax
; CHECK-NEXT: popq %rax
ret i32 0
}
; CHECK: .section {{.*}}xray_instr_map
; CHECK-LABEL: Lxray_synthetic_0:
; CHECK: .quad {{.*}}xray_event_sled_0
declare void @llvm.xray.customevent(i8*, i32)