[XRay] Implement powerpc64le xray.

Summary:
powerpc64 big-endian is not supported, but I believe that most logic can
be shared, except for xray_powerpc64.cc.

Also add a function InvalidateInstructionCache to xray_util.h, which is
copied from llvm/Support/Memory.cpp. I'm not sure if I need to add a unittest,
and I don't know how.

Reviewers: dberris, echristo, iteratee, kbarton, hfinkel

Subscribers: mehdi_amini, nemanjai, mgorny, llvm-commits

Differential Revision: https://reviews.llvm.org/D29742

llvm-svn: 294781
This commit is contained in:
Tim Shen 2017-02-10 21:03:24 +00:00
parent 58fc1b50d8
commit 918ed871df
14 changed files with 434 additions and 4 deletions

View File

@ -5064,6 +5064,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
case llvm::Triple::x86_64:
case llvm::Triple::arm:
case llvm::Triple::aarch64:
case llvm::Triple::ppc64le:
// Supported.
break;
default:

View File

@ -175,7 +175,7 @@ set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64})
set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64})
set(ALL_ESAN_SUPPORTED_ARCH ${X86_64} ${MIPS64})
set(ALL_SCUDO_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64})
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64})
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${PPC64})
if(APPLE)
include(CompilerRTDarwinUtils)

View File

@ -29,6 +29,12 @@ set(aarch64_SOURCES
xray_trampoline_AArch64.S
${XRAY_SOURCES})
set(powerpc64le_SOURCES
xray_powerpc64.cc
xray_trampoline_powerpc64.cc
xray_trampoline_powerpc64.S
${XRAY_SOURCES})
include_directories(..)
include_directories(../../include)

View File

@ -35,6 +35,8 @@ static const int16_t cSledLength = 12;
static const int16_t cSledLength = 32;
#elif defined(__arm__)
static const int16_t cSledLength = 28;
#elif defined(__powerpc64__)
static const int16_t cSledLength = 8;
#else
#error "Unsupported CPU Architecture"
#endif /* CPU architecture */

View File

@ -0,0 +1,95 @@
//===-- xray_AArch64.cc -----------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of XRay, a dynamic runtime instrumentation system.
//
// Implementation of powerpc64 and powerpc64le routines.
//
//===----------------------------------------------------------------------===//
#include "sanitizer_common/sanitizer_common.h"
#include "xray_defs.h"
#include "xray_interface_internal.h"
#include "xray_utils.h"
#include <atomic>
#include <cassert>
#include <cstring>
#ifndef __LITTLE_ENDIAN__
#error powerpc64 big endian is not supported for now.
#endif
namespace {
constexpr unsigned long long JumpOverInstNum = 7;
void clearCache(void *Addr, size_t Len) {
const size_t LineSize = 32;
const intptr_t Mask = ~(LineSize - 1);
const intptr_t StartLine = ((intptr_t)Addr) & Mask;
const intptr_t EndLine = ((intptr_t)Addr + Len + LineSize - 1) & Mask;
for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
asm volatile("dcbf 0, %0" : : "r"(Line));
asm volatile("sync");
for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
asm volatile("icbi 0, %0" : : "r"(Line));
asm volatile("isync");
}
} // namespace
extern "C" void __clear_cache(void *start, void *end);
namespace __xray {
bool patchFunctionEntry(const bool Enable, uint32_t FuncId,
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
if (Enable) {
// lis 0, FuncId[16..32]
// li 0, FuncId[0..15]
*reinterpret_cast<uint64_t *>(Sled.Address) =
(0x3c000000ull + (FuncId >> 16)) +
((0x60000000ull + (FuncId & 0xffff)) << 32);
} else {
// b +JumpOverInstNum instructions.
*reinterpret_cast<uint32_t *>(Sled.Address) =
0x48000000ull + (JumpOverInstNum << 2);
}
clearCache(reinterpret_cast<void *>(Sled.Address), 8);
return true;
}
bool patchFunctionExit(const bool Enable, uint32_t FuncId,
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
if (Enable) {
// lis 0, FuncId[16..32]
// li 0, FuncId[0..15]
*reinterpret_cast<uint64_t *>(Sled.Address) =
(0x3c000000ull + (FuncId >> 16)) +
((0x60000000ull + (FuncId & 0xffff)) << 32);
} else {
// Copy the blr/b instruction after JumpOverInstNum instructions.
*reinterpret_cast<uint32_t *>(Sled.Address) =
*(reinterpret_cast<uint32_t *>(Sled.Address) + JumpOverInstNum);
}
clearCache(reinterpret_cast<void *>(Sled.Address), 8);
return true;
}
bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
return patchFunctionExit(Enable, FuncId, Sled);
}
// FIXME: Maybe implement this better?
bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
} // namespace __xray

View File

@ -0,0 +1,37 @@
//===-- xray_x86_64.inc -----------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of XRay, a dynamic runtime instrumentation system.
//
//===----------------------------------------------------------------------===//
#include <cstdint>
#include <mutex>
#include <sys/platform/ppc.h>
#include "xray_defs.h"
namespace __xray {
ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
CPU = 0;
return __ppc_get_timebase();
}
inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
static std::mutex M;
std::lock_guard<std::mutex> Guard(M);
return __ppc_get_timebase_freq();
}
inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
return true;
}
} // namespace __xray

View File

@ -0,0 +1,171 @@
.text
.abiversion 2
.globl __xray_FunctionEntry
.p2align 4
__xray_FunctionEntry:
std 0, 16(1)
stdu 1, -408(1)
# Spill r3-r10, f1-f13, and vsr34-vsr45, which are parameter registers.
# If this appears to be slow, the caller needs to pass in number of generic,
# floating point, and vector parameters, so that we only spill those live ones.
std 3, 32(1)
ld 3, 400(1) # FuncId
std 4, 40(1)
std 5, 48(1)
std 6, 56(1)
std 7, 64(1)
std 8, 72(1)
std 9, 80(1)
std 10, 88(1)
addi 4, 1, 96
stxsdx 1, 0, 4
addi 4, 1, 104
stxsdx 2, 0, 4
addi 4, 1, 112
stxsdx 3, 0, 4
addi 4, 1, 120
stxsdx 4, 0, 4
addi 4, 1, 128
stxsdx 5, 0, 4
addi 4, 1, 136
stxsdx 6, 0, 4
addi 4, 1, 144
stxsdx 7, 0, 4
addi 4, 1, 152
stxsdx 8, 0, 4
addi 4, 1, 160
stxsdx 9, 0, 4
addi 4, 1, 168
stxsdx 10, 0, 4
addi 4, 1, 176
stxsdx 11, 0, 4
addi 4, 1, 184
stxsdx 12, 0, 4
addi 4, 1, 192
stxsdx 13, 0, 4
addi 4, 1, 200
stxvd2x 34, 0, 4
addi 4, 1, 216
stxvd2x 35, 0, 4
addi 4, 1, 232
stxvd2x 36, 0, 4
addi 4, 1, 248
stxvd2x 37, 0, 4
addi 4, 1, 264
stxvd2x 38, 0, 4
addi 4, 1, 280
stxvd2x 39, 0, 4
addi 4, 1, 296
stxvd2x 40, 0, 4
addi 4, 1, 312
stxvd2x 41, 0, 4
addi 4, 1, 328
stxvd2x 42, 0, 4
addi 4, 1, 344
stxvd2x 43, 0, 4
addi 4, 1, 360
stxvd2x 44, 0, 4
addi 4, 1, 376
stxvd2x 45, 0, 4
std 2, 392(1)
mflr 0
std 0, 400(1)
li 4, 0
bl _ZN6__xray23CallXRayPatchedFunctionEi13XRayEntryType
nop
addi 4, 1, 96
lxsdx 1, 0, 4
addi 4, 1, 104
lxsdx 2, 0, 4
addi 4, 1, 112
lxsdx 3, 0, 4
addi 4, 1, 120
lxsdx 4, 0, 4
addi 4, 1, 128
lxsdx 5, 0, 4
addi 4, 1, 136
lxsdx 6, 0, 4
addi 4, 1, 144
lxsdx 7, 0, 4
addi 4, 1, 152
lxsdx 8, 0, 4
addi 4, 1, 160
lxsdx 9, 0, 4
addi 4, 1, 168
lxsdx 10, 0, 4
addi 4, 1, 176
lxsdx 11, 0, 4
addi 4, 1, 184
lxsdx 12, 0, 4
addi 4, 1, 192
lxsdx 13, 0, 4
addi 4, 1, 200
lxvd2x 34, 0, 4
addi 4, 1, 216
lxvd2x 35, 0, 4
addi 4, 1, 232
lxvd2x 36, 0, 4
addi 4, 1, 248
lxvd2x 37, 0, 4
addi 4, 1, 264
lxvd2x 38, 0, 4
addi 4, 1, 280
lxvd2x 39, 0, 4
addi 4, 1, 296
lxvd2x 40, 0, 4
addi 4, 1, 312
lxvd2x 41, 0, 4
addi 4, 1, 328
lxvd2x 42, 0, 4
addi 4, 1, 344
lxvd2x 43, 0, 4
addi 4, 1, 360
lxvd2x 44, 0, 4
addi 4, 1, 376
lxvd2x 45, 0, 4
ld 0, 400(1)
mtlr 0
ld 2, 392(1)
ld 3, 32(1)
ld 4, 40(1)
ld 5, 48(1)
ld 6, 56(1)
ld 7, 64(1)
ld 8, 72(1)
ld 9, 80(1)
ld 10, 88(1)
addi 1, 1, 408
ld 0, 16(1)
blr
.globl __xray_FunctionExit
.p2align 4
__xray_FunctionExit:
std 0, 16(1)
ld 0, -8(1) # FuncId
stdu 1, -72(1)
# Spill r3, f1, and vsr34, the return value registers.
std 3, 32(1)
mr 3, 0
addi 4, 1, 40
stxsdx 1, 0, 4
addi 4, 1, 48
stxvd2x 34, 0, 4
mflr 0
std 0, 64(1)
li 4, 1
bl _ZN6__xray23CallXRayPatchedFunctionEi13XRayEntryType
nop
ld 0, 64(1)
mtlr 0
ld 3, 32(1)
addi 4, 1, 40
lxsdx 1, 0, 4
addi 4, 1, 48
lxvd2x 34, 0, 4
addi 1, 1, 72
ld 0, 16(1)
blr

View File

@ -0,0 +1,15 @@
#include <atomic>
#include <xray/xray_interface.h>
namespace __xray {
extern std::atomic<void (*)(int32_t, XRayEntryType)> XRayPatchedFunction;
// Implement this in C++ instead of assembly, to avoid dealing with ToC by hand.
void CallXRayPatchedFunction(int32_t FuncId, XRayEntryType Type) {
auto fptr = __xray::XRayPatchedFunction.load();
if (fptr != nullptr)
(*fptr)(FuncId, Type);
}
} // namespace __xray

View File

@ -15,6 +15,8 @@
#if defined(__x86_64__)
#include "xray_x86_64.inc"
#elif defined(__powerpc64__)
#include "xray_powerpc64.inc"
#elif defined(__arm__) || defined(__aarch64__)
// Emulated TSC.
// There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does

View File

@ -157,6 +157,7 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
case Triple::ArchType::arm:
case Triple::ArchType::thumb:
case Triple::ArchType::aarch64:
case Triple::ArchType::ppc64le:
// For the architectures which don't have a single return instruction
prependRetWithPatchableExit(MF, TII);
break;

View File

@ -112,7 +112,9 @@ public:
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
bool runOnMachineFunction(MachineFunction &MF) override {
Subtarget = &MF.getSubtarget<PPCSubtarget>();
return AsmPrinter::runOnMachineFunction(MF);
bool Changed = AsmPrinter::runOnMachineFunction(MF);
emitXRayTable();
return Changed;
}
};
@ -134,6 +136,7 @@ public:
void EmitFunctionBodyStart() override;
void EmitFunctionBodyEnd() override;
void EmitInstruction(const MachineInstr *MI) override;
};
/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
@ -1046,6 +1049,98 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
}
void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (!Subtarget->isPPC64())
return PPCAsmPrinter::EmitInstruction(MI);
switch (MI->getOpcode()) {
default:
return PPCAsmPrinter::EmitInstruction(MI);
case TargetOpcode::PATCHABLE_FUNCTION_ENTER: {
// .begin:
// b .end # lis 0, FuncId[16..32]
// nop # li 0, FuncId[0..15]
// std 0, -8(1)
// mflr 0
// bl __xray_FunctionEntry
// mtlr 0
// .end:
//
// Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number
// of instructions change.
MCSymbol *BeginOfSled = OutContext.createTempSymbol();
MCSymbol *EndOfSled = OutContext.createTempSymbol();
OutStreamer->EmitLabel(BeginOfSled);
EmitToStreamer(*OutStreamer,
MCInstBuilder(PPC::B).addExpr(
MCSymbolRefExpr::create(EndOfSled, OutContext)));
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
EmitToStreamer(
*OutStreamer,
MCInstBuilder(PPC::STD).addReg(PPC::X0).addImm(-8).addReg(PPC::X1));
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR8).addReg(PPC::X0));
EmitToStreamer(*OutStreamer,
MCInstBuilder(PPC::BL8_NOP)
.addExpr(MCSymbolRefExpr::create(
OutContext.getOrCreateSymbol("__xray_FunctionEntry"),
OutContext)));
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR8).addReg(PPC::X0));
OutStreamer->EmitLabel(EndOfSled);
recordSled(BeginOfSled, *MI, SledKind::FUNCTION_ENTER);
break;
}
case TargetOpcode::PATCHABLE_FUNCTION_EXIT: {
// .p2align 3
// .begin:
// b(lr)? # lis 0, FuncId[16..32]
// nop # li 0, FuncId[0..15]
// std 0, -8(1)
// mflr 0
// bl __xray_FunctionExit
// mtlr 0
// .end:
// b(lr)?
//
// Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number
// of instructions change.
const MachineInstr *Next = [&] {
MachineBasicBlock::const_iterator It(MI);
const MachineBasicBlock *MBB = MI->getParent();
assert(It != MBB->end());
++It;
assert(It->isReturn());
return &*It;
}();
OutStreamer->EmitCodeAlignment(8);
MCSymbol *BeginOfSled = OutContext.createTempSymbol();
OutStreamer->EmitLabel(BeginOfSled);
MCInst TmpInst;
LowerPPCMachineInstrToMCInst(Next, TmpInst, *this, false);
EmitToStreamer(*OutStreamer, TmpInst);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
EmitToStreamer(
*OutStreamer,
MCInstBuilder(PPC::STD).addReg(PPC::X0).addImm(-8).addReg(PPC::X1));
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR8).addReg(PPC::X0));
EmitToStreamer(*OutStreamer,
MCInstBuilder(PPC::BL8_NOP)
.addExpr(MCSymbolRefExpr::create(
OutContext.getOrCreateSymbol("__xray_FunctionExit"),
OutContext)));
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR8).addReg(PPC::X0));
recordSled(BeginOfSled, *MI, SledKind::FUNCTION_EXIT);
break;
}
case TargetOpcode::PATCHABLE_TAIL_CALL:
case TargetOpcode::PATCHABLE_RET:
// PPC's tail call instruction, e.g. PPC::TCRETURNdi8, doesn't really
// lower to a PPC::B instruction. The PPC::B instruction is generated
// before it, and handled by the normal case.
llvm_unreachable("Tail call is handled in the normal case. See comments
around this assert.");
}
}
void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (static_cast<const PPCTargetMachine &>(TM).isELFv2ABI()) {
PPCTargetStreamer *TS =

View File

@ -65,7 +65,9 @@ UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
void PPCInstrInfo::anchor() {}
PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
: PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
: PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
/* CatchRetOpcode */ -1,
STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
Subtarget(STI), RI(STI.getTargetMachine()) {}
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for

View File

@ -318,6 +318,8 @@ public:
/// classifyGlobalReference - Classify a global variable reference for the
/// current subtarget accourding to how we should reference it.
unsigned char classifyGlobalReference(const GlobalValue *GV) const;
bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; }
};
} // End llvm namespace

View File

@ -55,7 +55,8 @@ loadELF64(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
// Find the section named "xray_instr_map".
if (!ObjFile.getBinary()->isELF() ||
ObjFile.getBinary()->getArch() != Triple::x86_64)
!(ObjFile.getBinary()->getArch() == Triple::x86_64 ||
ObjFile.getBinary()->getArch() == Triple::ppc64le))
return make_error<StringError>(
"File format not supported (only does ELF little endian 64-bit).",
std::make_error_code(std::errc::not_supported));