forked from OSchip/llvm-project
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event logging in XRay. Initially, its use case will be to allow users of XRay to log some type of string ("poor man's printf"). The target opcode compiles to a noop sled large enough to enable calling through to a runtime-determined relative function call. At runtime, when X-Ray is enabled, the sled is replaced by compiler-rt with a trampoline to the logic for creating the custom log entries. Future patches will implement the compiler-rt parts and clang-side support for emitting the IR corresponding to this intrinsic. Reviewers: timshen, dberris Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits Differential Revision: https://reviews.llvm.org/D27503 llvm-svn: 302405
This commit is contained in:
parent
389d8cebd1
commit
9bcaed867a
|
@ -226,6 +226,7 @@ public:
|
|||
FUNCTION_EXIT = 1,
|
||||
TAIL_CALL = 2,
|
||||
LOG_ARGS_ENTER = 3,
|
||||
CUSTOM_EVENT = 4,
|
||||
};
|
||||
|
||||
// The table will contain these structs that point to the sled, the function
|
||||
|
|
|
@ -506,6 +506,7 @@ protected:
|
|||
bool selectCast(const User *I, unsigned Opcode);
|
||||
bool selectExtractValue(const User *I);
|
||||
bool selectInsertValue(const User *I);
|
||||
bool selectXRayCustomEvent(const CallInst *II);
|
||||
|
||||
private:
|
||||
/// \brief Handle PHI nodes in successor blocks.
|
||||
|
|
|
@ -795,6 +795,14 @@ def int_type_checked_load : Intrinsic<[llvm_ptr_ty, llvm_i1_ty],
|
|||
def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
|
||||
[IntrReadMem, IntrArgMemOnly]>;
|
||||
|
||||
// Xray intrinsics
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Custom event logging for x-ray.
|
||||
// Takes a pointer to a string and the length of the string.
|
||||
def int_xray_customevent : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
|
||||
[NoCapture<0>, ReadOnly<0>, IntrWriteMem]>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===------ Memory intrinsics with element-wise atomicity guarantees ------===//
|
||||
//
|
||||
|
||||
|
|
|
@ -1002,6 +1002,16 @@ def PATCHABLE_TAIL_CALL : Instruction {
|
|||
let hasSideEffects = 1;
|
||||
let isReturn = 1;
|
||||
}
|
||||
def PATCHABLE_EVENT_CALL : Instruction {
|
||||
let OutOperandList = (outs);
|
||||
let InOperandList = (ins ptr_rc:$event, i8imm:$size);
|
||||
let AsmString = "# XRay Custom Event Log.";
|
||||
let usesCustomInserter = 1;
|
||||
let isCall = 1;
|
||||
let mayLoad = 1;
|
||||
let mayStore = 1;
|
||||
let hasSideEffects = 1;
|
||||
}
|
||||
def FENTRY_CALL : Instruction {
|
||||
let OutOperandList = (outs unknown:$dst);
|
||||
let InOperandList = (ins variable_ops);
|
||||
|
|
|
@ -182,6 +182,10 @@ HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_EXIT)
|
|||
/// PATCHABLE_RET which specifically only works for return instructions.
|
||||
HANDLE_TARGET_OPCODE(PATCHABLE_TAIL_CALL)
|
||||
|
||||
/// Wraps a logging call and its arguments with nop sleds. At runtime, this can be
|
||||
/// patched to insert instrumentation instructions.
|
||||
HANDLE_TARGET_OPCODE(PATCHABLE_EVENT_CALL)
|
||||
|
||||
/// The following generic opcodes are not supposed to appear after ISel.
|
||||
/// This is something we might want to relax, but for now, this is convenient
|
||||
/// to produce diagnostics.
|
||||
|
|
|
@ -861,6 +861,25 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool FastISel::selectXRayCustomEvent(const CallInst *I) {
|
||||
const auto &Triple = TM.getTargetTriple();
|
||||
if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
|
||||
return true; // don't do anything to this instruction.
|
||||
SmallVector<MachineOperand, 8> Ops;
|
||||
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
|
||||
/*IsDef=*/false));
|
||||
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
|
||||
/*IsDef=*/false));
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
|
||||
TII.get(TargetOpcode::PATCHABLE_EVENT_CALL));
|
||||
for (auto &MO : Ops)
|
||||
MIB.add(MO);
|
||||
// Insert the Patchable Event Call instruction, that gets lowered properly.
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/// Returns an AttributeList representing the attributes applied to the return
|
||||
/// value of the given call.
|
||||
static AttributeList getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
|
||||
|
@ -1252,6 +1271,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
|
|||
case Intrinsic::experimental_patchpoint_void:
|
||||
case Intrinsic::experimental_patchpoint_i64:
|
||||
return selectPatchpoint(II);
|
||||
|
||||
case Intrinsic::xray_customevent:
|
||||
return selectXRayCustomEvent(II);
|
||||
}
|
||||
|
||||
return fastLowerIntrinsicCall(II);
|
||||
|
|
|
@ -5703,7 +5703,37 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
|
|||
setValue(&I, N);
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::xray_customevent: {
|
||||
// Here we want to make sure that the intrinsic behaves as if it has a
|
||||
// specific calling convention, and only for x86_64.
|
||||
// FIXME: Support other platforms later.
|
||||
const auto &Triple = DAG.getTarget().getTargetTriple();
|
||||
if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
|
||||
return nullptr;
|
||||
|
||||
SDLoc DL = getCurSDLoc();
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
|
||||
// We want to say that we always want the arguments in registers.
|
||||
SDValue LogEntryVal = getValue(I.getArgOperand(0));
|
||||
SDValue StrSizeVal = getValue(I.getArgOperand(1));
|
||||
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
|
||||
SDValue Chain = getRoot();
|
||||
Ops.push_back(LogEntryVal);
|
||||
Ops.push_back(StrSizeVal);
|
||||
Ops.push_back(Chain);
|
||||
|
||||
// We need to enforce the calling convention for the callsite, so that
|
||||
// argument ordering is enforced correctly, and that register allocation can
|
||||
// see that some registers may be assumed clobbered and have to preserve
|
||||
// them across calls to the intrinsic.
|
||||
MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL,
|
||||
DL, NodeTys, Ops);
|
||||
SDValue patchableNode = SDValue(MN, 0);
|
||||
DAG.setRoot(patchableNode);
|
||||
setValue(&I, patchableNode);
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::experimental_deoptimize:
|
||||
LowerDeoptimizeCall(&I);
|
||||
return nullptr;
|
||||
|
|
|
@ -91,6 +91,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
|
|||
X86MCInstLower &MCIL);
|
||||
void LowerPATCHABLE_RET(const MachineInstr &MI, X86MCInstLower &MCIL);
|
||||
void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
|
||||
void LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
|
||||
|
||||
void LowerFENTRY_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
|
||||
|
||||
|
|
|
@ -26517,6 +26517,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|||
case TargetOpcode::STACKMAP:
|
||||
case TargetOpcode::PATCHPOINT:
|
||||
return emitPatchPoint(MI, BB);
|
||||
|
||||
case TargetOpcode::PATCHABLE_EVENT_CALL:
|
||||
// Do nothing here, handle in xray instrumentation pass.
|
||||
return BB;
|
||||
|
||||
case X86::LCMPXCHG8B: {
|
||||
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
|
||||
|
|
|
@ -1040,6 +1040,83 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
|
|||
getSubtargetInfo());
|
||||
}
|
||||
|
||||
void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
|
||||
X86MCInstLower &MCIL) {
|
||||
assert(Subtarget->is64Bit() && "XRay custom events only suports X86-64");
|
||||
|
||||
// We want to emit the following pattern, which follows the x86 calling
|
||||
// convention to prepare for the trampoline call to be patched in.
|
||||
//
|
||||
// <args placement according SysV64 calling convention>
|
||||
// .p2align 1, ...
|
||||
// .Lxray_event_sled_N:
|
||||
// jmp +N // jump across the call instruction
|
||||
// callq __xray_CustomEvent // force relocation to symbol
|
||||
// <args cleanup, jump to here>
|
||||
//
|
||||
// The relative jump needs to jump forward 24 bytes:
|
||||
// 10 (args) + 5 (nops) + 9 (cleanup)
|
||||
//
|
||||
// After patching, it would look something like:
|
||||
//
|
||||
// nopw (2-byte nop)
|
||||
// callq __xrayCustomEvent // already lowered
|
||||
//
|
||||
// ---
|
||||
// First we emit the label and the jump.
|
||||
auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
|
||||
OutStreamer->AddComment("# XRay Custom Event Log");
|
||||
OutStreamer->EmitCodeAlignment(2);
|
||||
OutStreamer->EmitLabel(CurSled);
|
||||
|
||||
// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
|
||||
// an operand (computed as an offset from the jmp instruction).
|
||||
// FIXME: Find another less hacky way do force the relative jump.
|
||||
OutStreamer->EmitBytes("\xeb\x14");
|
||||
|
||||
// The default C calling convention will place two arguments into %rcx and
|
||||
// %rdx -- so we only work with those.
|
||||
unsigned UsedRegs[] = {X86::RDI, X86::RSI, X86::RAX};
|
||||
|
||||
// Because we will use %rax, we preserve that across the call.
|
||||
EmitAndCountInstruction(MCInstBuilder(X86::PUSH64r).addReg(X86::RAX));
|
||||
|
||||
// Then we put the operands in the %rdi and %rsi registers.
|
||||
for (unsigned I = 0; I < MI.getNumOperands(); ++I)
|
||||
if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
|
||||
if (Op->isImm())
|
||||
EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri)
|
||||
.addReg(UsedRegs[I])
|
||||
.addImm(Op->getImm()));
|
||||
else if (Op->isReg()) {
|
||||
if (Op->getReg() != UsedRegs[I])
|
||||
EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr)
|
||||
.addReg(UsedRegs[I])
|
||||
.addReg(Op->getReg()));
|
||||
else
|
||||
EmitNops(*OutStreamer, 3, Subtarget->is64Bit(), getSubtargetInfo());
|
||||
}
|
||||
}
|
||||
|
||||
// We emit a hard dependency on the __xray_CustomEvent symbol, which is the
|
||||
// name of the trampoline to be implemented by the XRay runtime. We put this
|
||||
// explicitly in the %rax register.
|
||||
auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
|
||||
MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
|
||||
EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri)
|
||||
.addReg(X86::RAX)
|
||||
.addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
|
||||
|
||||
// Emit the call instruction.
|
||||
EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(X86::RAX));
|
||||
|
||||
// Restore caller-saved and used registers.
|
||||
OutStreamer->AddComment("xray custom event end.");
|
||||
EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(X86::RAX));
|
||||
|
||||
recordSled(CurSled, MI, SledKind::CUSTOM_EVENT);
|
||||
}
|
||||
|
||||
void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
|
||||
X86MCInstLower &MCIL) {
|
||||
// We want to emit the following pattern:
|
||||
|
@ -1415,6 +1492,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
|||
|
||||
case TargetOpcode::PATCHABLE_TAIL_CALL:
|
||||
return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
|
||||
|
||||
case TargetOpcode::PATCHABLE_EVENT_CALL:
|
||||
return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
|
||||
|
||||
case X86::MORESTACK_RET:
|
||||
EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
; RUN: llc -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
|
||||
|
||||
define i32 @fn() nounwind noinline uwtable "function-instrument"="xray-always" {
|
||||
%eventptr = alloca i8
|
||||
%eventsize = alloca i32
|
||||
store i32 3, i32* %eventsize
|
||||
%val = load i32, i32* %eventsize
|
||||
call void @llvm.xray.customevent(i8* %eventptr, i32 %val)
|
||||
; CHECK-LABEL: Lxray_event_sled_0:
|
||||
; CHECK-NEXT: .ascii "\353\024
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: movq {{.*}}, %rdi
|
||||
; CHECK-NEXT: movq {{.*}}, %rsi
|
||||
; CHECK-NEXT: movabsq $__xray_CustomEvent, %rax
|
||||
; CHECK-NEXT: callq *%rax
|
||||
; CHECK-NEXT: popq %rax
|
||||
ret i32 0
|
||||
}
|
||||
; CHECK: .section {{.*}}xray_instr_map
|
||||
; CHECK-LABEL: Lxray_synthetic_0:
|
||||
; CHECK: .quad {{.*}}xray_event_sled_0
|
||||
|
||||
declare void @llvm.xray.customevent(i8*, i32)
|
Loading…
Reference in New Issue