forked from OSchip/llvm-project
233 lines
5.7 KiB
ArmAsm
233 lines
5.7 KiB
ArmAsm
//===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file is a part of XRay, a dynamic runtime instrumentation system.
|
|
//
|
|
// This implements the X86-specific assembler for the trampolines.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "../builtins/assembly.h"
|
|
|
|
.macro SAVE_REGISTERS
|
|
subq $192, %rsp
|
|
.cfi_def_cfa_offset 200
|
|
// At this point, the stack pointer should be aligned to an 8-byte boundary,
|
|
// because any call instructions that come after this will add another 8
|
|
// bytes and therefore align it to 16-bytes.
|
|
movq %rbp, 184(%rsp)
|
|
movupd %xmm0, 168(%rsp)
|
|
movupd %xmm1, 152(%rsp)
|
|
movupd %xmm2, 136(%rsp)
|
|
movupd %xmm3, 120(%rsp)
|
|
movupd %xmm4, 104(%rsp)
|
|
movupd %xmm5, 88(%rsp)
|
|
movupd %xmm6, 72(%rsp)
|
|
movupd %xmm7, 56(%rsp)
|
|
movq %rdi, 48(%rsp)
|
|
movq %rax, 40(%rsp)
|
|
movq %rdx, 32(%rsp)
|
|
movq %rsi, 24(%rsp)
|
|
movq %rcx, 16(%rsp)
|
|
movq %r8, 8(%rsp)
|
|
movq %r9, 0(%rsp)
|
|
.endm
|
|
|
|
.macro RESTORE_REGISTERS
|
|
movq 184(%rsp), %rbp
|
|
movupd 168(%rsp), %xmm0
|
|
movupd 152(%rsp), %xmm1
|
|
movupd 136(%rsp), %xmm2
|
|
movupd 120(%rsp), %xmm3
|
|
movupd 104(%rsp), %xmm4
|
|
movupd 88(%rsp), %xmm5
|
|
movupd 72(%rsp) , %xmm6
|
|
movupd 56(%rsp) , %xmm7
|
|
movq 48(%rsp), %rdi
|
|
movq 40(%rsp), %rax
|
|
movq 32(%rsp), %rdx
|
|
movq 24(%rsp), %rsi
|
|
movq 16(%rsp), %rcx
|
|
movq 8(%rsp), %r8
|
|
movq 0(%rsp), %r9
|
|
addq $192, %rsp
|
|
.cfi_def_cfa_offset 8
|
|
.endm
|
|
|
|
.text
|
|
.file "xray_trampoline_x86.S"
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
.globl __xray_FunctionEntry
|
|
.align 16, 0x90
|
|
.type __xray_FunctionEntry,@function
|
|
|
|
__xray_FunctionEntry:
|
|
.cfi_startproc
|
|
SAVE_REGISTERS
|
|
|
|
// This load has to be atomic, it's concurrent with __xray_patch().
|
|
// On x86/amd64, a simple (type-aligned) MOV instruction is enough.
|
|
movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
|
|
testq %rax, %rax
|
|
je .Ltmp0
|
|
|
|
// The patched function prolog puts its xray_instr_map index into %r10d.
|
|
movl %r10d, %edi
|
|
xor %esi,%esi
|
|
callq *%rax
|
|
.Ltmp0:
|
|
RESTORE_REGISTERS
|
|
retq
|
|
.Ltmp1:
|
|
.size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry
|
|
.cfi_endproc
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
.globl __xray_FunctionExit
|
|
.align 16, 0x90
|
|
.type __xray_FunctionExit,@function
|
|
__xray_FunctionExit:
|
|
.cfi_startproc
|
|
// Save the important registers first. Since we're assuming that this
|
|
// function is only jumped into, we only preserve the registers for
|
|
// returning.
|
|
subq $56, %rsp
|
|
.cfi_def_cfa_offset 64
|
|
movq %rbp, 48(%rsp)
|
|
movupd %xmm0, 32(%rsp)
|
|
movupd %xmm1, 16(%rsp)
|
|
movq %rax, 8(%rsp)
|
|
movq %rdx, 0(%rsp)
|
|
movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
|
|
testq %rax,%rax
|
|
je .Ltmp2
|
|
|
|
movl %r10d, %edi
|
|
movl $1, %esi
|
|
callq *%rax
|
|
.Ltmp2:
|
|
// Restore the important registers.
|
|
movq 48(%rsp), %rbp
|
|
movupd 32(%rsp), %xmm0
|
|
movupd 16(%rsp), %xmm1
|
|
movq 8(%rsp), %rax
|
|
movq 0(%rsp), %rdx
|
|
addq $56, %rsp
|
|
.cfi_def_cfa_offset 8
|
|
retq
|
|
.Ltmp3:
|
|
.size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit
|
|
.cfi_endproc
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
.global __xray_FunctionTailExit
|
|
.align 16, 0x90
|
|
.type __xray_FunctionTailExit,@function
|
|
__xray_FunctionTailExit:
|
|
.cfi_startproc
|
|
// Save the important registers as in the entry trampoline, but indicate that
|
|
// this is an exit. In the future, we will introduce a new entry type that
|
|
// differentiates between a normal exit and a tail exit, but we'd have to do
|
|
// this and increment the version number for the header.
|
|
SAVE_REGISTERS
|
|
|
|
movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
|
|
testq %rax,%rax
|
|
je .Ltmp4
|
|
|
|
movl %r10d, %edi
|
|
movl $1, %esi
|
|
callq *%rax
|
|
|
|
.Ltmp4:
|
|
RESTORE_REGISTERS
|
|
retq
|
|
.Ltmp5:
|
|
.size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit
|
|
.cfi_endproc
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
.globl __xray_ArgLoggerEntry
|
|
.align 16, 0x90
|
|
.type __xray_ArgLoggerEntry,@function
|
|
__xray_ArgLoggerEntry:
|
|
.cfi_startproc
|
|
SAVE_REGISTERS
|
|
|
|
// Again, these function pointer loads must be atomic; MOV is fine.
|
|
movq _ZN6__xray13XRayArgLoggerE(%rip), %rax
|
|
testq %rax, %rax
|
|
jne .Larg1entryLog
|
|
|
|
// If [arg1 logging handler] not set, defer to no-arg logging.
|
|
movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
|
|
testq %rax, %rax
|
|
je .Larg1entryFail
|
|
|
|
.Larg1entryLog:
|
|
|
|
// First argument will become the third
|
|
movq %rdi, %rdx
|
|
|
|
// XRayEntryType::ENTRY into the second
|
|
xorq %rsi, %rsi
|
|
|
|
// 32-bit function ID becomes the first
|
|
movl %r10d, %edi
|
|
callq *%rax
|
|
|
|
.Larg1entryFail:
|
|
RESTORE_REGISTERS
|
|
retq
|
|
|
|
.Larg1entryEnd:
|
|
.size __xray_ArgLoggerEntry, .Larg1entryEnd-__xray_ArgLoggerEntry
|
|
.cfi_endproc
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
.global __xray_CustomEvent
|
|
.align 16, 0x90
|
|
.type __xray_CustomEvent,@function
|
|
__xray_CustomEvent:
|
|
.cfi_startproc
|
|
subq $16, %rsp
|
|
.cfi_def_cfa_offset 24
|
|
movq %rbp, 8(%rsp)
|
|
movq %rax, 0(%rsp)
|
|
|
|
// We take two arguments to this trampoline, which should be in rdi and rsi
|
|
// already. We also make sure that we stash %rax because we use that register
|
|
// to call the logging handler.
|
|
movq _ZN6__xray22XRayPatchedCustomEventE(%rip), %rax
|
|
testq %rax,%rax
|
|
je .LcustomEventCleanup
|
|
|
|
// At this point we know that rcx and rdx already has the data, so we just
|
|
// call the logging handler.
|
|
callq *%rax
|
|
|
|
.LcustomEventCleanup:
|
|
movq 0(%rsp), %rax
|
|
movq 8(%rsp), %rbp
|
|
addq $16, %rsp
|
|
.cfi_def_cfa_offset 8
|
|
retq
|
|
|
|
.Ltmp8:
|
|
.size __xray_CustomEvent, .Ltmp8-__xray_CustomEvent
|
|
.cfi_endproc
|
|
|
|
NO_EXEC_STACK_DIRECTIVE
|