2009-09-03 01:35:12 +08:00
|
|
|
//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2009-09-03 01:35:12 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file contains code to lower X86 MachineInstrs to their corresponding
|
|
|
|
// MCInst records.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2019-05-11 07:24:38 +08:00
|
|
|
#include "MCTargetDesc/X86ATTInstPrinter.h"
|
2014-03-19 14:53:25 +08:00
|
|
|
#include "MCTargetDesc/X86BaseInfo.h"
|
2019-05-11 07:24:38 +08:00
|
|
|
#include "MCTargetDesc/X86InstComments.h"
|
2020-04-14 00:33:05 +08:00
|
|
|
#include "MCTargetDesc/X86ShuffleDecode.h"
|
[codeview] Implement FPO data assembler directives
Summary:
This adds a set of new directives that describe 32-bit x86 prologues.
The directives are limited and do not expose the full complexity of
codeview FPO data. They are merely a convenience for the compiler to
generate more readable assembly so we don't need to generate tons of
labels in CodeGen. If our prologue emission changes in the future, we
can change the set of available directives to suit our needs. These are
modelled after the .seh_ directives, which use a different format that
interacts with exception handling.
The directives are:
.cv_fpo_proc _foo
.cv_fpo_pushreg ebp/ebx/etc
.cv_fpo_setframe ebp/esi/etc
.cv_fpo_stackalloc 200
.cv_fpo_endprologue
.cv_fpo_endproc
.cv_fpo_data _foo
I tried to follow the implementation of ARM EHABI CFI directives by
sinking most directives out of MCStreamer and into X86TargetStreamer.
This helps avoid polluting non-X86 code with WinCOFF specific logic.
I used cdb to confirm that this can show locals in parent CSRs in a few
cases, most importantly the one where we use ESI as a frame pointer,
i.e. the one in http://crbug.com/756153#c28
Once we have cdb integration in debuginfo-tests, we can add integration
tests there.
Reviewers: majnemer, hans
Subscribers: aemerson, mgorny, kristof.beyls, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D38776
llvm-svn: 315513
2017-10-12 05:24:33 +08:00
|
|
|
#include "MCTargetDesc/X86TargetStreamer.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "X86AsmPrinter.h"
|
|
|
|
#include "X86RegisterInfo.h"
|
|
|
|
#include "X86ShuffleDecodeConstantPool.h"
|
2020-04-19 18:38:50 +08:00
|
|
|
#include "X86Subtarget.h"
|
2015-06-16 02:44:01 +08:00
|
|
|
#include "llvm/ADT/Optional.h"
|
2012-12-04 00:50:05 +08:00
|
|
|
#include "llvm/ADT/SmallString.h"
|
2016-04-19 13:24:47 +08:00
|
|
|
#include "llvm/ADT/iterator_range.h"
|
2014-07-26 07:47:11 +08:00
|
|
|
#include "llvm/CodeGen/MachineConstantPool.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
2009-09-16 14:25:03 +08:00
|
|
|
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
2013-11-01 06:11:56 +08:00
|
|
|
#include "llvm/CodeGen/StackMaps.h"
|
2014-03-19 14:53:25 +08:00
|
|
|
#include "llvm/IR/DataLayout.h"
|
|
|
|
#include "llvm/IR/GlobalValue.h"
|
2014-01-08 05:19:40 +08:00
|
|
|
#include "llvm/IR/Mangler.h"
|
2011-07-15 07:50:31 +08:00
|
|
|
#include "llvm/MC/MCAsmInfo.h"
|
2014-07-25 04:40:55 +08:00
|
|
|
#include "llvm/MC/MCCodeEmitter.h"
|
2009-09-03 01:35:12 +08:00
|
|
|
#include "llvm/MC/MCContext.h"
|
|
|
|
#include "llvm/MC/MCExpr.h"
|
2015-05-16 06:19:42 +08:00
|
|
|
#include "llvm/MC/MCFixup.h"
|
2009-09-03 01:35:12 +08:00
|
|
|
#include "llvm/MC/MCInst.h"
|
2012-11-26 21:34:22 +08:00
|
|
|
#include "llvm/MC/MCInstBuilder.h"
|
XRay: Add entry and exit sleds
Summary:
In this patch we implement the following parts of XRay:
- Supporting a function attribute named 'function-instrument' which currently only supports 'xray-always'. We should be able to use this attribute for other instrumentation approaches.
- Supporting a function attribute named 'xray-instruction-threshold' used to determine whether a function is instrumented with a minimum number of instructions (IR instruction counts).
- X86-specific nop sleds as described in the white paper.
- A machine function pass that adds the different instrumentation marker instructions at a very late stage.
- A way of identifying which return opcode is considered "normal" for each architecture.
There are some caveats here:
1) We don't handle PATCHABLE_RET in platforms other than x86_64 yet -- this means if IR used PATCHABLE_RET directly instead of a normal ret, instruction lowering for that platform might do the wrong thing. We think this should be handled at instruction selection time to by default be unpacked for platforms where XRay is not availble yet.
2) The generated section for X86 is different from what is described from the white paper for the sole reason that LLVM allows us to do this neatly. We're taking the opportunity to deviate from the white paper from this perspective to allow us to get richer information from the runtime library.
Reviewers: sanjoy, eugenis, kcc, pcc, echristo, rnk
Subscribers: niravd, majnemer, atrick, rnk, emaste, bmakam, mcrosier, mehdi_amini, llvm-commits
Differential Revision: http://reviews.llvm.org/D19904
llvm-svn: 275367
2016-07-14 12:06:33 +08:00
|
|
|
#include "llvm/MC/MCSection.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/MC/MCSectionELF.h"
|
2009-09-03 01:35:12 +08:00
|
|
|
#include "llvm/MC/MCStreamer.h"
|
2010-03-13 03:42:40 +08:00
|
|
|
#include "llvm/MC/MCSymbol.h"
|
XRay: Add entry and exit sleds
Summary:
In this patch we implement the following parts of XRay:
- Supporting a function attribute named 'function-instrument' which currently only supports 'xray-always'. We should be able to use this attribute for other instrumentation approaches.
- Supporting a function attribute named 'xray-instruction-threshold' used to determine whether a function is instrumented with a minimum number of instructions (IR instruction counts).
- X86-specific nop sleds as described in the white paper.
- A machine function pass that adds the different instrumentation marker instructions at a very late stage.
- A way of identifying which return opcode is considered "normal" for each architecture.
There are some caveats here:
1) We don't handle PATCHABLE_RET in platforms other than x86_64 yet -- this means if IR used PATCHABLE_RET directly instead of a normal ret, instruction lowering for that platform might do the wrong thing. We think this should be handled at instruction selection time to by default be unpacked for platforms where XRay is not availble yet.
2) The generated section for X86 is different from what is described from the white paper for the sole reason that LLVM allows us to do this neatly. We're taking the opportunity to deviate from the white paper from this perspective to allow us to get richer information from the runtime library.
Reviewers: sanjoy, eugenis, kcc, pcc, echristo, rnk
Subscribers: niravd, majnemer, atrick, rnk, emaste, bmakam, mcrosier, mehdi_amini, llvm-commits
Differential Revision: http://reviews.llvm.org/D19904
llvm-svn: 275367
2016-07-14 12:06:33 +08:00
|
|
|
#include "llvm/MC/MCSymbolELF.h"
|
2018-03-24 07:58:19 +08:00
|
|
|
#include "llvm/Target/TargetLoweringObjectFile.h"
|
2020-05-24 19:30:22 +08:00
|
|
|
#include "llvm/Target/TargetMachine.h"
|
XRay: Add entry and exit sleds
Summary:
In this patch we implement the following parts of XRay:
- Supporting a function attribute named 'function-instrument' which currently only supports 'xray-always'. We should be able to use this attribute for other instrumentation approaches.
- Supporting a function attribute named 'xray-instruction-threshold' used to determine whether a function is instrumented with a minimum number of instructions (IR instruction counts).
- X86-specific nop sleds as described in the white paper.
- A machine function pass that adds the different instrumentation marker instructions at a very late stage.
- A way of identifying which return opcode is considered "normal" for each architecture.
There are some caveats here:
1) We don't handle PATCHABLE_RET in platforms other than x86_64 yet -- this means if IR used PATCHABLE_RET directly instead of a normal ret, instruction lowering for that platform might do the wrong thing. We think this should be handled at instruction selection time to by default be unpacked for platforms where XRay is not availble yet.
2) The generated section for X86 is different from what is described from the white paper for the sole reason that LLVM allows us to do this neatly. We're taking the opportunity to deviate from the white paper from this perspective to allow us to get richer information from the runtime library.
Reviewers: sanjoy, eugenis, kcc, pcc, echristo, rnk
Subscribers: niravd, majnemer, atrick, rnk, emaste, bmakam, mcrosier, mehdi_amini, llvm-commits
Differential Revision: http://reviews.llvm.org/D19904
llvm-svn: 275367
2016-07-14 12:06:33 +08:00
|
|
|
|
2009-09-03 01:35:12 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2012-10-16 14:01:50 +08:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
|
|
|
|
class X86MCInstLower {
|
|
|
|
MCContext &Ctx;
|
|
|
|
const MachineFunction &MF;
|
|
|
|
const TargetMachine &TM;
|
|
|
|
const MCAsmInfo &MAI;
|
|
|
|
X86AsmPrinter &AsmPrinter;
|
2018-04-18 05:30:29 +08:00
|
|
|
|
2012-10-16 14:01:50 +08:00
|
|
|
public:
|
2013-10-30 00:11:22 +08:00
|
|
|
X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
|
2012-10-16 14:01:50 +08:00
|
|
|
|
2015-06-16 02:44:01 +08:00
|
|
|
Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
|
|
|
|
const MachineOperand &MO) const;
|
2012-10-16 14:01:50 +08:00
|
|
|
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
|
|
|
|
|
|
|
|
MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
|
|
|
|
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
|
|
|
|
|
|
|
|
private:
|
|
|
|
MachineModuleInfoMachO &getMachOMMI() const;
|
|
|
|
};
|
|
|
|
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
2020-01-17 06:08:31 +08:00
|
|
|
/// A RAII helper which defines a region of instructions which can't have
|
|
|
|
/// padding added between them for correctness.
|
|
|
|
struct NoAutoPaddingScope {
|
|
|
|
MCStreamer &OS;
|
|
|
|
const bool OldAllowAutoPadding;
|
|
|
|
NoAutoPaddingScope(MCStreamer &OS)
|
|
|
|
: OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
|
|
|
|
changeAndComment(false);
|
|
|
|
}
|
|
|
|
~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); }
|
|
|
|
void changeAndComment(bool b) {
|
|
|
|
if (b == OS.getAllowAutoPadding())
|
|
|
|
return;
|
|
|
|
OS.setAllowAutoPadding(b);
|
|
|
|
if (b)
|
|
|
|
OS.emitRawComment("autopadding");
|
|
|
|
else
|
|
|
|
OS.emitRawComment("noautopadding");
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2014-07-25 04:40:55 +08:00
|
|
|
// Emit a minimal sequence of nops spanning NumBytes bytes.
|
2020-06-18 00:08:12 +08:00
|
|
|
static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
|
|
|
|
const X86Subtarget *Subtarget);
|
2014-07-25 04:40:55 +08:00
|
|
|
|
2016-04-20 02:48:16 +08:00
|
|
|
void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
|
|
|
|
const MCSubtargetInfo &STI,
|
|
|
|
MCCodeEmitter *CodeEmitter) {
|
|
|
|
if (InShadow) {
|
|
|
|
SmallString<256> Code;
|
|
|
|
SmallVector<MCFixup, 4> Fixups;
|
|
|
|
raw_svector_ostream VecOS(Code);
|
|
|
|
CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
|
|
|
|
CurrentShadowSize += Code.size();
|
|
|
|
if (CurrentShadowSize >= RequiredShadowSize)
|
|
|
|
InShadow = false; // The shadow is big enough. Stop counting.
|
2014-07-25 04:40:55 +08:00
|
|
|
}
|
2016-04-20 02:48:16 +08:00
|
|
|
}
|
2014-07-25 04:40:55 +08:00
|
|
|
|
2016-04-20 02:48:16 +08:00
|
|
|
void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
|
2014-07-25 04:40:55 +08:00
|
|
|
MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
|
2016-04-20 02:48:16 +08:00
|
|
|
if (InShadow && CurrentShadowSize < RequiredShadowSize) {
|
|
|
|
InShadow = false;
|
2020-06-18 00:08:12 +08:00
|
|
|
emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
|
|
|
|
&MF->getSubtarget<X86Subtarget>());
|
2014-07-25 04:40:55 +08:00
|
|
|
}
|
2016-04-20 02:48:16 +08:00
|
|
|
}
|
2014-07-25 04:40:55 +08:00
|
|
|
|
2016-04-20 02:48:16 +08:00
|
|
|
void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
|
2020-02-14 13:58:16 +08:00
|
|
|
OutStreamer->emitInstruction(Inst, getSubtargetInfo());
|
2016-04-20 02:48:16 +08:00
|
|
|
SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
|
|
|
|
}
|
2014-07-25 04:40:55 +08:00
|
|
|
|
2013-10-30 00:11:22 +08:00
|
|
|
X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
|
2010-07-23 05:10:04 +08:00
|
|
|
X86AsmPrinter &asmprinter)
|
2015-02-03 01:38:43 +08:00
|
|
|
: Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
|
|
|
|
AsmPrinter(asmprinter) {}
|
2009-09-13 04:34:57 +08:00
|
|
|
|
2009-09-16 14:25:03 +08:00
|
|
|
MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
|
2010-07-21 06:26:07 +08:00
|
|
|
return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
|
2009-09-16 14:25:03 +08:00
|
|
|
}
|
|
|
|
|
2010-02-09 07:03:41 +08:00
|
|
|
/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
|
|
|
|
/// operand to an MCSymbol.
|
2018-04-18 05:30:29 +08:00
|
|
|
MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
|
2020-01-23 04:27:17 +08:00
|
|
|
const Triple &TT = TM.getTargetTriple();
|
|
|
|
if (MO.isGlobal() && TT.isOSBinFormatELF())
|
|
|
|
return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal());
|
|
|
|
|
2015-07-16 14:11:10 +08:00
|
|
|
const DataLayout &DL = MF.getDataLayout();
|
2018-04-18 05:30:29 +08:00
|
|
|
assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
|
|
|
|
"Isn't a symbol reference");
|
2010-02-09 07:03:41 +08:00
|
|
|
|
2015-06-03 08:02:40 +08:00
|
|
|
MCSymbol *Sym = nullptr;
|
2009-09-11 13:58:44 +08:00
|
|
|
SmallString<128> Name;
|
2013-11-29 04:12:44 +08:00
|
|
|
StringRef Suffix;
|
|
|
|
|
|
|
|
switch (MO.getTargetFlags()) {
|
2015-06-11 09:31:48 +08:00
|
|
|
case X86II::MO_DLLIMPORT:
|
|
|
|
// Handle dllimport linkage.
|
|
|
|
Name += "__imp_";
|
|
|
|
break;
|
[MinGW] [X86] Add stubs for references to data variables that might end up imported from a dll
Variables declared with the dllimport attribute are accessed via a
stub variable named __imp_<var>. In MinGW configurations, variables that
aren't declared with a dllimport attribute might still end up imported
from another DLL with runtime pseudo relocs.
For x86_64, this avoids the risk that the target is out of range
for a 32 bit PC relative reference, in case the target DLL is loaded
further than 4 GB from the reference. It also avoids having to make the
text section writable at runtime when doing the runtime fixups, which
makes it worthwhile to do for i386 as well.
Add stub variables for all dso local data references where a definition
of the variable isn't visible within the module, since the DLL data
autoimporting might make them imported even though they are marked as
dso local within LLVM.
Don't do this for variables that actually are defined within the same
module, since we then know for sure that it actually is dso local.
Don't do this for references to functions, since there's no need for
runtime pseudo relocations for autoimporting them; if a function from
a different DLL is called without the appropriate dllimport attribute,
the call just gets routed via a thunk instead.
GCC does something similar since 4.9 (when compiling with -mcmodel=medium
or large; from that version, medium is the default code model for x86_64
mingw), but only for x86_64.
Differential Revision: https://reviews.llvm.org/D51288
llvm-svn: 340942
2018-08-30 01:28:34 +08:00
|
|
|
case X86II::MO_COFFSTUB:
|
|
|
|
Name += ".refptr.";
|
|
|
|
break;
|
2013-11-29 04:12:44 +08:00
|
|
|
case X86II::MO_DARWIN_NONLAZY:
|
|
|
|
case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
|
|
|
|
Suffix = "$non_lazy_ptr";
|
|
|
|
break;
|
|
|
|
}
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2013-12-05 13:19:12 +08:00
|
|
|
if (!Suffix.empty())
|
2015-07-16 14:11:10 +08:00
|
|
|
Name += DL.getPrivateGlobalPrefix();
|
2013-12-05 13:19:12 +08:00
|
|
|
|
2012-10-17 10:22:27 +08:00
|
|
|
if (MO.isGlobal()) {
|
2010-03-13 03:42:40 +08:00
|
|
|
const GlobalValue *GV = MO.getGlobal();
|
2014-02-20 01:23:20 +08:00
|
|
|
AsmPrinter.getNameWithPrefix(Name, GV);
|
2012-10-17 10:22:27 +08:00
|
|
|
} else if (MO.isSymbol()) {
|
2015-07-16 14:11:10 +08:00
|
|
|
Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
|
2012-10-17 10:22:27 +08:00
|
|
|
} else if (MO.isMBB()) {
|
2015-06-03 08:02:40 +08:00
|
|
|
assert(Suffix.empty());
|
|
|
|
Sym = MO.getMBB()->getSymbol();
|
2010-02-09 07:03:41 +08:00
|
|
|
}
|
|
|
|
|
2013-11-29 04:12:44 +08:00
|
|
|
Name += Suffix;
|
2015-06-03 08:02:40 +08:00
|
|
|
if (!Sym)
|
|
|
|
Sym = Ctx.getOrCreateSymbol(Name);
|
2013-12-05 13:19:12 +08:00
|
|
|
|
2010-02-09 07:03:41 +08:00
|
|
|
// If the target flags on the operand changes the name of the symbol, do that
|
|
|
|
// before we return the symbol.
|
2009-09-03 13:06:07 +08:00
|
|
|
switch (MO.getTargetFlags()) {
|
2018-04-18 05:30:29 +08:00
|
|
|
default:
|
|
|
|
break;
|
[MinGW] [X86] Add stubs for references to data variables that might end up imported from a dll
Variables declared with the dllimport attribute are accessed via a
stub variable named __imp_<var>. In MinGW configurations, variables that
aren't declared with a dllimport attribute might still end up imported
from another DLL with runtime pseudo relocs.
For x86_64, this avoids the risk that the target is out of range
for a 32 bit PC relative reference, in case the target DLL is loaded
further than 4 GB from the reference. It also avoids having to make the
text section writable at runtime when doing the runtime fixups, which
makes it worthwhile to do for i386 as well.
Add stub variables for all dso local data references where a definition
of the variable isn't visible within the module, since the DLL data
autoimporting might make them imported even though they are marked as
dso local within LLVM.
Don't do this for variables that actually are defined within the same
module, since we then know for sure that it actually is dso local.
Don't do this for references to functions, since there's no need for
runtime pseudo relocations for autoimporting them; if a function from
a different DLL is called without the appropriate dllimport attribute,
the call just gets routed via a thunk instead.
GCC does something similar since 4.9 (when compiling with -mcmodel=medium
or large; from that version, medium is the default code model for x86_64
mingw), but only for x86_64.
Differential Revision: https://reviews.llvm.org/D51288
llvm-svn: 340942
2018-08-30 01:28:34 +08:00
|
|
|
case X86II::MO_COFFSTUB: {
|
|
|
|
MachineModuleInfoCOFF &MMICOFF =
|
|
|
|
MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
|
|
|
|
MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
|
|
|
|
if (!StubSym.getPointer()) {
|
|
|
|
assert(MO.isGlobal() && "Extern symbol not handled yet");
|
|
|
|
StubSym = MachineModuleInfoImpl::StubValueTy(
|
2018-08-31 16:00:31 +08:00
|
|
|
AsmPrinter.getSymbol(MO.getGlobal()), true);
|
[MinGW] [X86] Add stubs for references to data variables that might end up imported from a dll
Variables declared with the dllimport attribute are accessed via a
stub variable named __imp_<var>. In MinGW configurations, variables that
aren't declared with a dllimport attribute might still end up imported
from another DLL with runtime pseudo relocs.
For x86_64, this avoids the risk that the target is out of range
for a 32 bit PC relative reference, in case the target DLL is loaded
further than 4 GB from the reference. It also avoids having to make the
text section writable at runtime when doing the runtime fixups, which
makes it worthwhile to do for i386 as well.
Add stub variables for all dso local data references where a definition
of the variable isn't visible within the module, since the DLL data
autoimporting might make them imported even though they are marked as
dso local within LLVM.
Don't do this for variables that actually are defined within the same
module, since we then know for sure that it actually is dso local.
Don't do this for references to functions, since there's no need for
runtime pseudo relocations for autoimporting them; if a function from
a different DLL is called without the appropriate dllimport attribute,
the call just gets routed via a thunk instead.
GCC does something similar since 4.9 (when compiling with -mcmodel=medium
or large; from that version, medium is the default code model for x86_64
mingw), but only for x86_64.
Differential Revision: https://reviews.llvm.org/D51288
llvm-svn: 340942
2018-08-30 01:28:34 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2009-09-03 13:06:07 +08:00
|
|
|
case X86II::MO_DARWIN_NONLAZY:
|
2009-09-11 14:59:18 +08:00
|
|
|
case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
|
2010-03-11 06:34:10 +08:00
|
|
|
MachineModuleInfoImpl::StubValueTy &StubSym =
|
2018-04-18 05:30:29 +08:00
|
|
|
getMachOMMI().getGVStubEntry(Sym);
|
2014-04-25 13:30:21 +08:00
|
|
|
if (!StubSym.getPointer()) {
|
2010-02-09 07:03:41 +08:00
|
|
|
assert(MO.isGlobal() && "Extern symbol not handled yet");
|
2018-04-18 05:30:29 +08:00
|
|
|
StubSym = MachineModuleInfoImpl::StubValueTy(
|
|
|
|
AsmPrinter.getSymbol(MO.getGlobal()),
|
|
|
|
!MO.getGlobal()->hasInternalLinkage());
|
2010-02-09 07:03:41 +08:00
|
|
|
}
|
2013-11-29 04:12:44 +08:00
|
|
|
break;
|
2009-09-11 14:59:18 +08:00
|
|
|
}
|
2009-09-03 12:44:53 +08:00
|
|
|
}
|
2010-02-09 07:03:41 +08:00
|
|
|
|
2013-11-29 04:12:44 +08:00
|
|
|
return Sym;
|
2009-09-03 12:56:20 +08:00
|
|
|
}
|
|
|
|
|
2009-09-13 04:34:57 +08:00
|
|
|
MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
|
|
|
|
MCSymbol *Sym) const {
|
2009-09-03 15:30:56 +08:00
|
|
|
// FIXME: We would like an efficient form for this, so we don't have to do a
|
|
|
|
// lot of extra uniquing.
|
2014-04-25 13:30:21 +08:00
|
|
|
const MCExpr *Expr = nullptr;
|
2010-03-16 07:51:06 +08:00
|
|
|
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2009-09-03 12:56:20 +08:00
|
|
|
switch (MO.getTargetFlags()) {
|
2018-04-18 05:30:29 +08:00
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown target flag on GV operand");
|
|
|
|
case X86II::MO_NO_FLAG: // No flag.
|
2009-09-03 13:06:07 +08:00
|
|
|
// These affect the name of the symbol, not any suffix.
|
|
|
|
case X86II::MO_DARWIN_NONLAZY:
|
|
|
|
case X86II::MO_DLLIMPORT:
|
[MinGW] [X86] Add stubs for references to data variables that might end up imported from a dll
Variables declared with the dllimport attribute are accessed via a
stub variable named __imp_<var>. In MinGW configurations, variables that
aren't declared with a dllimport attribute might still end up imported
from another DLL with runtime pseudo relocs.
For x86_64, this avoids the risk that the target is out of range
for a 32 bit PC relative reference, in case the target DLL is loaded
further than 4 GB from the reference. It also avoids having to make the
text section writable at runtime when doing the runtime fixups, which
makes it worthwhile to do for i386 as well.
Add stub variables for all dso local data references where a definition
of the variable isn't visible within the module, since the DLL data
autoimporting might make them imported even though they are marked as
dso local within LLVM.
Don't do this for variables that actually are defined within the same
module, since we then know for sure that it actually is dso local.
Don't do this for references to functions, since there's no need for
runtime pseudo relocations for autoimporting them; if a function from
a different DLL is called without the appropriate dllimport attribute,
the call just gets routed via a thunk instead.
GCC does something similar since 4.9 (when compiling with -mcmodel=medium
or large; from that version, medium is the default code model for x86_64
mingw), but only for x86_64.
Differential Revision: https://reviews.llvm.org/D51288
llvm-svn: 340942
2018-08-30 01:28:34 +08:00
|
|
|
case X86II::MO_COFFSTUB:
|
2009-09-03 13:06:07 +08:00
|
|
|
break;
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2018-04-18 05:30:29 +08:00
|
|
|
case X86II::MO_TLVP:
|
|
|
|
RefKind = MCSymbolRefExpr::VK_TLVP;
|
|
|
|
break;
|
2010-06-03 12:07:48 +08:00
|
|
|
case X86II::MO_TLVP_PIC_BASE:
|
2015-05-30 09:25:56 +08:00
|
|
|
Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
|
2010-07-15 07:04:59 +08:00
|
|
|
// Subtract the pic base.
|
2018-04-18 05:30:29 +08:00
|
|
|
Expr = MCBinaryExpr::createSub(
|
|
|
|
Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
|
|
|
|
break;
|
|
|
|
case X86II::MO_SECREL:
|
|
|
|
RefKind = MCSymbolRefExpr::VK_SECREL;
|
|
|
|
break;
|
|
|
|
case X86II::MO_TLSGD:
|
|
|
|
RefKind = MCSymbolRefExpr::VK_TLSGD;
|
|
|
|
break;
|
|
|
|
case X86II::MO_TLSLD:
|
|
|
|
RefKind = MCSymbolRefExpr::VK_TLSLD;
|
|
|
|
break;
|
|
|
|
case X86II::MO_TLSLDM:
|
|
|
|
RefKind = MCSymbolRefExpr::VK_TLSLDM;
|
|
|
|
break;
|
|
|
|
case X86II::MO_GOTTPOFF:
|
|
|
|
RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
|
|
|
|
break;
|
|
|
|
case X86II::MO_INDNTPOFF:
|
|
|
|
RefKind = MCSymbolRefExpr::VK_INDNTPOFF;
|
|
|
|
break;
|
|
|
|
case X86II::MO_TPOFF:
|
|
|
|
RefKind = MCSymbolRefExpr::VK_TPOFF;
|
|
|
|
break;
|
|
|
|
case X86II::MO_DTPOFF:
|
|
|
|
RefKind = MCSymbolRefExpr::VK_DTPOFF;
|
|
|
|
break;
|
|
|
|
case X86II::MO_NTPOFF:
|
|
|
|
RefKind = MCSymbolRefExpr::VK_NTPOFF;
|
|
|
|
break;
|
|
|
|
case X86II::MO_GOTNTPOFF:
|
|
|
|
RefKind = MCSymbolRefExpr::VK_GOTNTPOFF;
|
|
|
|
break;
|
|
|
|
case X86II::MO_GOTPCREL:
|
|
|
|
RefKind = MCSymbolRefExpr::VK_GOTPCREL;
|
|
|
|
break;
|
|
|
|
case X86II::MO_GOT:
|
|
|
|
RefKind = MCSymbolRefExpr::VK_GOT;
|
|
|
|
break;
|
|
|
|
case X86II::MO_GOTOFF:
|
|
|
|
RefKind = MCSymbolRefExpr::VK_GOTOFF;
|
|
|
|
break;
|
|
|
|
case X86II::MO_PLT:
|
|
|
|
RefKind = MCSymbolRefExpr::VK_PLT;
|
|
|
|
break;
|
|
|
|
case X86II::MO_ABS8:
|
|
|
|
RefKind = MCSymbolRefExpr::VK_X86_ABS8;
|
|
|
|
break;
|
2009-09-03 13:06:07 +08:00
|
|
|
case X86II::MO_PIC_BASE_OFFSET:
|
|
|
|
case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
|
2015-05-30 09:25:56 +08:00
|
|
|
Expr = MCSymbolRefExpr::create(Sym, Ctx);
|
2009-09-03 13:06:07 +08:00
|
|
|
// Subtract the pic base.
|
2018-04-18 05:30:29 +08:00
|
|
|
Expr = MCBinaryExpr::createSub(
|
|
|
|
Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
|
2014-10-21 09:17:30 +08:00
|
|
|
if (MO.isJTI()) {
|
2016-06-19 07:25:37 +08:00
|
|
|
assert(MAI.doesSetDirectiveSuppressReloc());
|
2010-04-13 07:07:17 +08:00
|
|
|
// If .set directive is supported, use it to reduce the number of
|
|
|
|
// relocations the assembler will generate for differences between
|
|
|
|
// local labels. This is only safe when the symbols are in the same
|
|
|
|
// section so we are restricting it to jumptable references.
|
2015-05-19 02:43:14 +08:00
|
|
|
MCSymbol *Label = Ctx.createTempSymbol();
|
2020-02-15 10:16:24 +08:00
|
|
|
AsmPrinter.OutStreamer->emitAssignment(Label, Expr);
|
2015-05-30 09:25:56 +08:00
|
|
|
Expr = MCSymbolRefExpr::create(Label, Ctx);
|
2010-04-13 07:07:17 +08:00
|
|
|
}
|
2009-09-03 13:06:07 +08:00
|
|
|
break;
|
2009-09-03 15:30:56 +08:00
|
|
|
}
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2014-04-25 13:30:21 +08:00
|
|
|
if (!Expr)
|
2015-05-30 09:25:56 +08:00
|
|
|
Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2012-10-17 10:22:27 +08:00
|
|
|
if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
|
2018-04-18 05:30:29 +08:00
|
|
|
Expr = MCBinaryExpr::createAdd(
|
|
|
|
Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
|
2015-05-14 02:37:00 +08:00
|
|
|
return MCOperand::createExpr(Expr);
|
2009-09-03 12:44:53 +08:00
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
|
2010-05-19 01:22:24 +08:00
|
|
|
/// a short fixed-register form.
|
|
|
|
static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
|
|
|
|
unsigned ImmOp = Inst.getNumOperands() - 1;
|
2012-02-12 01:26:53 +08:00
|
|
|
assert(Inst.getOperand(0).isReg() &&
|
|
|
|
(Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
|
2010-05-19 01:22:24 +08:00
|
|
|
((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
|
|
|
|
Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
|
2018-04-18 05:30:29 +08:00
|
|
|
Inst.getNumOperands() == 2) &&
|
|
|
|
"Unexpected instruction!");
|
2010-05-19 01:22:24 +08:00
|
|
|
|
|
|
|
// Check whether the destination register can be fixed.
|
|
|
|
unsigned Reg = Inst.getOperand(0).getReg();
|
|
|
|
if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// If so, rewrite the instruction.
|
2010-05-19 14:20:44 +08:00
|
|
|
MCOperand Saved = Inst.getOperand(ImmOp);
|
|
|
|
Inst = MCInst();
|
|
|
|
Inst.setOpcode(Opcode);
|
|
|
|
Inst.addOperand(Saved);
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// If a movsx instruction has a shorter encoding for the used register
|
2013-07-13 02:06:44 +08:00
|
|
|
/// simplify the instruction to use it instead.
|
|
|
|
static void SimplifyMOVSX(MCInst &Inst) {
|
|
|
|
unsigned NewOpcode = 0;
|
|
|
|
unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg();
|
|
|
|
switch (Inst.getOpcode()) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unexpected instruction!");
|
2018-04-18 05:30:29 +08:00
|
|
|
case X86::MOVSX16rr8: // movsbw %al, %ax --> cbtw
|
2013-07-13 02:06:44 +08:00
|
|
|
if (Op0 == X86::AX && Op1 == X86::AL)
|
|
|
|
NewOpcode = X86::CBW;
|
|
|
|
break;
|
|
|
|
case X86::MOVSX32rr16: // movswl %ax, %eax --> cwtl
|
|
|
|
if (Op0 == X86::EAX && Op1 == X86::AX)
|
|
|
|
NewOpcode = X86::CWDE;
|
|
|
|
break;
|
|
|
|
case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq
|
|
|
|
if (Op0 == X86::RAX && Op1 == X86::EAX)
|
|
|
|
NewOpcode = X86::CDQE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NewOpcode != 0) {
|
|
|
|
Inst = MCInst();
|
|
|
|
Inst.setOpcode(NewOpcode);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Simplify things like MOV32rm to MOV32o32a.
|
2010-08-17 05:03:32 +08:00
|
|
|
static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
|
|
|
|
unsigned Opcode) {
|
|
|
|
// Don't make these simplifications in 64-bit mode; other assemblers don't
|
|
|
|
// perform them because they make the code larger.
|
|
|
|
if (Printer.getSubtarget().is64Bit())
|
|
|
|
return;
|
|
|
|
|
2010-05-19 14:20:44 +08:00
|
|
|
bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
|
|
|
|
unsigned AddrBase = IsStore;
|
|
|
|
unsigned RegOp = IsStore ? 0 : 5;
|
|
|
|
unsigned AddrOp = AddrBase + 3;
|
2018-04-18 05:30:29 +08:00
|
|
|
assert(
|
|
|
|
Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
|
|
|
|
Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
|
|
|
|
Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
|
|
|
|
Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
|
|
|
|
Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
|
|
|
|
(Inst.getOperand(AddrOp).isExpr() || Inst.getOperand(AddrOp).isImm()) &&
|
|
|
|
"Unexpected instruction!");
|
2010-05-19 14:20:44 +08:00
|
|
|
|
|
|
|
// Check whether the destination register can be fixed.
|
|
|
|
unsigned Reg = Inst.getOperand(RegOp).getReg();
|
|
|
|
if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
|
|
|
|
return;
|
|
|
|
|
|
|
|
// Check whether this is an absolute address.
|
2012-08-02 02:39:17 +08:00
|
|
|
// FIXME: We know TLVP symbol refs aren't, but there should be a better way
|
2010-06-17 08:51:48 +08:00
|
|
|
// to do this here.
|
|
|
|
bool Absolute = true;
|
|
|
|
if (Inst.getOperand(AddrOp).isExpr()) {
|
|
|
|
const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
|
|
|
|
if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
|
|
|
|
if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
|
|
|
|
Absolute = false;
|
|
|
|
}
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2010-06-17 08:51:48 +08:00
|
|
|
if (Absolute &&
|
2014-03-19 00:14:11 +08:00
|
|
|
(Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 ||
|
|
|
|
Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 ||
|
|
|
|
Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0))
|
2010-05-19 14:20:44 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
// If so, rewrite the instruction.
|
|
|
|
MCOperand Saved = Inst.getOperand(AddrOp);
|
2014-03-19 00:14:11 +08:00
|
|
|
MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg);
|
2010-05-19 14:20:44 +08:00
|
|
|
Inst = MCInst();
|
|
|
|
Inst.setOpcode(Opcode);
|
|
|
|
Inst.addOperand(Saved);
|
2014-01-16 15:57:45 +08:00
|
|
|
Inst.addOperand(Seg);
|
2010-05-19 01:22:24 +08:00
|
|
|
}
|
2009-09-13 04:34:57 +08:00
|
|
|
|
2014-12-04 13:20:33 +08:00
|
|
|
static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
|
|
|
|
return Subtarget.is64Bit() ? X86::RETQ : X86::RETL;
|
2014-01-08 20:58:07 +08:00
|
|
|
}
|
|
|
|
|
2015-06-16 02:44:01 +08:00
|
|
|
Optional<MCOperand>
|
|
|
|
X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
|
|
|
|
const MachineOperand &MO) const {
|
|
|
|
switch (MO.getType()) {
|
|
|
|
default:
|
2017-01-28 10:02:38 +08:00
|
|
|
MI->print(errs());
|
2015-06-16 02:44:01 +08:00
|
|
|
llvm_unreachable("unknown operand type");
|
|
|
|
case MachineOperand::MO_Register:
|
|
|
|
// Ignore all implicit register operands.
|
|
|
|
if (MO.isImplicit())
|
|
|
|
return None;
|
|
|
|
return MCOperand::createReg(MO.getReg());
|
|
|
|
case MachineOperand::MO_Immediate:
|
|
|
|
return MCOperand::createImm(MO.getImm());
|
|
|
|
case MachineOperand::MO_MachineBasicBlock:
|
|
|
|
case MachineOperand::MO_GlobalAddress:
|
|
|
|
case MachineOperand::MO_ExternalSymbol:
|
|
|
|
return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
|
2015-06-23 01:46:53 +08:00
|
|
|
case MachineOperand::MO_MCSymbol:
|
|
|
|
return LowerSymbolOperand(MO, MO.getMCSymbol());
|
2015-06-16 02:44:01 +08:00
|
|
|
case MachineOperand::MO_JumpTableIndex:
|
|
|
|
return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
|
|
|
|
case MachineOperand::MO_ConstantPoolIndex:
|
|
|
|
return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
|
|
|
|
case MachineOperand::MO_BlockAddress:
|
|
|
|
return LowerSymbolOperand(
|
|
|
|
MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
|
|
|
|
case MachineOperand::MO_RegisterMask:
|
|
|
|
// Ignore call clobbers.
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-28 01:24:23 +08:00
|
|
|
// Replace TAILJMP opcodes with their equivalent opcodes that have encoding
|
|
|
|
// information.
|
|
|
|
static unsigned convertTailJumpOpcode(unsigned Opcode) {
|
|
|
|
switch (Opcode) {
|
|
|
|
case X86::TAILJMPr:
|
|
|
|
Opcode = X86::JMP32r;
|
|
|
|
break;
|
|
|
|
case X86::TAILJMPm:
|
|
|
|
Opcode = X86::JMP32m;
|
|
|
|
break;
|
|
|
|
case X86::TAILJMPr64:
|
|
|
|
Opcode = X86::JMP64r;
|
|
|
|
break;
|
|
|
|
case X86::TAILJMPm64:
|
|
|
|
Opcode = X86::JMP64m;
|
|
|
|
break;
|
|
|
|
case X86::TAILJMPr64_REX:
|
|
|
|
Opcode = X86::JMP64r_REX;
|
|
|
|
break;
|
|
|
|
case X86::TAILJMPm64_REX:
|
|
|
|
Opcode = X86::JMP64m_REX;
|
|
|
|
break;
|
|
|
|
case X86::TAILJMPd:
|
|
|
|
case X86::TAILJMPd64:
|
|
|
|
Opcode = X86::JMP_1;
|
|
|
|
break;
|
|
|
|
case X86::TAILJMPd_CC:
|
|
|
|
case X86::TAILJMPd64_CC:
|
|
|
|
Opcode = X86::JCC_1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return Opcode;
|
|
|
|
}
|
|
|
|
|
2009-09-13 04:34:57 +08:00
|
|
|
void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
|
|
|
|
OutMI.setOpcode(MI->getOpcode());
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2015-06-16 02:44:01 +08:00
|
|
|
for (const MachineOperand &MO : MI->operands())
|
|
|
|
if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
|
|
|
|
OutMI.addOperand(MaybeMCOp.getValue());
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2009-09-11 12:28:13 +08:00
|
|
|
// Handle a few special cases to eliminate operand modifiers.
|
2009-09-13 04:34:57 +08:00
|
|
|
switch (OutMI.getOpcode()) {
|
2013-06-11 04:43:49 +08:00
|
|
|
case X86::LEA64_32r:
|
2010-07-09 07:46:44 +08:00
|
|
|
case X86::LEA64r:
|
|
|
|
case X86::LEA16r:
|
|
|
|
case X86::LEA32r:
|
|
|
|
// LEA should have a segment register, but it must be empty.
|
2018-04-18 05:30:29 +08:00
|
|
|
assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands &&
|
2010-07-09 07:46:44 +08:00
|
|
|
"Unexpected # of LEA operands");
|
2018-04-18 05:30:29 +08:00
|
|
|
assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
|
2010-07-09 07:46:44 +08:00
|
|
|
"LEA has segment specified!");
|
2009-09-03 01:35:12 +08:00
|
|
|
break;
|
2010-02-06 05:30:49 +08:00
|
|
|
|
2020-05-31 06:51:56 +08:00
|
|
|
case X86::MULX32Hrr:
|
|
|
|
case X86::MULX32Hrm:
|
|
|
|
case X86::MULX64Hrr:
|
|
|
|
case X86::MULX64Hrm: {
|
|
|
|
// Turn into regular MULX by duplicating the destination.
|
|
|
|
unsigned NewOpc;
|
|
|
|
switch (OutMI.getOpcode()) {
|
|
|
|
default: llvm_unreachable("Invalid opcode");
|
|
|
|
case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
|
2020-06-24 23:37:06 +08:00
|
|
|
case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
|
2020-05-31 06:51:56 +08:00
|
|
|
case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
|
|
|
|
case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
|
|
|
|
}
|
|
|
|
OutMI.setOpcode(NewOpc);
|
|
|
|
// Duplicate the destination.
|
|
|
|
unsigned DestReg = OutMI.getOperand(0).getReg();
|
|
|
|
OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2013-03-14 15:09:57 +08:00
|
|
|
// Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
|
|
|
|
// if one of the registers is extended, but other isn't.
|
2015-10-12 12:57:59 +08:00
|
|
|
case X86::VMOVZPQILo2PQIrr:
|
2013-03-14 15:09:57 +08:00
|
|
|
case X86::VMOVAPDrr:
|
|
|
|
case X86::VMOVAPDYrr:
|
|
|
|
case X86::VMOVAPSrr:
|
|
|
|
case X86::VMOVAPSYrr:
|
|
|
|
case X86::VMOVDQArr:
|
|
|
|
case X86::VMOVDQAYrr:
|
|
|
|
case X86::VMOVDQUrr:
|
|
|
|
case X86::VMOVDQUYrr:
|
|
|
|
case X86::VMOVUPDrr:
|
|
|
|
case X86::VMOVUPDYrr:
|
|
|
|
case X86::VMOVUPSrr:
|
|
|
|
case X86::VMOVUPSYrr: {
|
2013-03-16 11:44:31 +08:00
|
|
|
if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
|
|
|
|
X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
|
|
|
|
unsigned NewOpc;
|
|
|
|
switch (OutMI.getOpcode()) {
|
2018-07-02 14:42:42 +08:00
|
|
|
default: llvm_unreachable("Invalid opcode");
|
|
|
|
case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
|
|
|
|
case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
|
|
|
|
case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
|
|
|
|
case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
|
|
|
|
case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
|
|
|
|
case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
|
|
|
|
case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
|
|
|
|
case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
|
|
|
|
case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
|
|
|
|
case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
|
|
|
|
case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
|
|
|
|
case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
|
|
|
|
case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
|
2013-03-16 11:44:31 +08:00
|
|
|
}
|
|
|
|
OutMI.setOpcode(NewOpc);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case X86::VMOVSDrr:
|
|
|
|
case X86::VMOVSSrr: {
|
|
|
|
if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
|
|
|
|
X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
|
|
|
|
unsigned NewOpc;
|
|
|
|
switch (OutMI.getOpcode()) {
|
2018-07-02 14:42:42 +08:00
|
|
|
default: llvm_unreachable("Invalid opcode");
|
|
|
|
case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
|
|
|
|
case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
|
2013-03-16 11:44:31 +08:00
|
|
|
}
|
|
|
|
OutMI.setOpcode(NewOpc);
|
2013-03-14 15:09:57 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2019-10-05 02:02:46 +08:00
|
|
|
case X86::VPCMPBZ128rmi: case X86::VPCMPBZ128rmik:
|
|
|
|
case X86::VPCMPBZ128rri: case X86::VPCMPBZ128rrik:
|
|
|
|
case X86::VPCMPBZ256rmi: case X86::VPCMPBZ256rmik:
|
|
|
|
case X86::VPCMPBZ256rri: case X86::VPCMPBZ256rrik:
|
|
|
|
case X86::VPCMPBZrmi: case X86::VPCMPBZrmik:
|
|
|
|
case X86::VPCMPBZrri: case X86::VPCMPBZrrik:
|
|
|
|
case X86::VPCMPDZ128rmi: case X86::VPCMPDZ128rmik:
|
|
|
|
case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
|
|
|
|
case X86::VPCMPDZ128rri: case X86::VPCMPDZ128rrik:
|
|
|
|
case X86::VPCMPDZ256rmi: case X86::VPCMPDZ256rmik:
|
|
|
|
case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
|
|
|
|
case X86::VPCMPDZ256rri: case X86::VPCMPDZ256rrik:
|
|
|
|
case X86::VPCMPDZrmi: case X86::VPCMPDZrmik:
|
|
|
|
case X86::VPCMPDZrmib: case X86::VPCMPDZrmibk:
|
|
|
|
case X86::VPCMPDZrri: case X86::VPCMPDZrrik:
|
|
|
|
case X86::VPCMPQZ128rmi: case X86::VPCMPQZ128rmik:
|
|
|
|
case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
|
|
|
|
case X86::VPCMPQZ128rri: case X86::VPCMPQZ128rrik:
|
|
|
|
case X86::VPCMPQZ256rmi: case X86::VPCMPQZ256rmik:
|
|
|
|
case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
|
|
|
|
case X86::VPCMPQZ256rri: case X86::VPCMPQZ256rrik:
|
|
|
|
case X86::VPCMPQZrmi: case X86::VPCMPQZrmik:
|
|
|
|
case X86::VPCMPQZrmib: case X86::VPCMPQZrmibk:
|
|
|
|
case X86::VPCMPQZrri: case X86::VPCMPQZrrik:
|
|
|
|
case X86::VPCMPWZ128rmi: case X86::VPCMPWZ128rmik:
|
|
|
|
case X86::VPCMPWZ128rri: case X86::VPCMPWZ128rrik:
|
|
|
|
case X86::VPCMPWZ256rmi: case X86::VPCMPWZ256rmik:
|
|
|
|
case X86::VPCMPWZ256rri: case X86::VPCMPWZ256rrik:
|
|
|
|
case X86::VPCMPWZrmi: case X86::VPCMPWZrmik:
|
|
|
|
case X86::VPCMPWZrri: case X86::VPCMPWZrrik: {
|
|
|
|
// Turn immediate 0 into the VPCMPEQ instruction.
|
|
|
|
if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) {
|
|
|
|
unsigned NewOpc;
|
|
|
|
switch (OutMI.getOpcode()) {
|
2020-01-07 00:17:05 +08:00
|
|
|
default: llvm_unreachable("Invalid opcode");
|
2019-10-05 02:02:46 +08:00
|
|
|
case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPEQBZ128rm; break;
|
|
|
|
case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPEQBZ128rmk; break;
|
|
|
|
case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPEQBZ128rr; break;
|
|
|
|
case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPEQBZ128rrk; break;
|
|
|
|
case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPEQBZ256rm; break;
|
|
|
|
case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPEQBZ256rmk; break;
|
|
|
|
case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPEQBZ256rr; break;
|
|
|
|
case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPEQBZ256rrk; break;
|
|
|
|
case X86::VPCMPBZrmi: NewOpc = X86::VPCMPEQBZrm; break;
|
|
|
|
case X86::VPCMPBZrmik: NewOpc = X86::VPCMPEQBZrmk; break;
|
|
|
|
case X86::VPCMPBZrri: NewOpc = X86::VPCMPEQBZrr; break;
|
|
|
|
case X86::VPCMPBZrrik: NewOpc = X86::VPCMPEQBZrrk; break;
|
|
|
|
case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPEQDZ128rm; break;
|
|
|
|
case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPEQDZ128rmb; break;
|
|
|
|
case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPEQDZ128rmbk; break;
|
|
|
|
case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPEQDZ128rmk; break;
|
|
|
|
case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPEQDZ128rr; break;
|
|
|
|
case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPEQDZ128rrk; break;
|
|
|
|
case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPEQDZ256rm; break;
|
|
|
|
case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPEQDZ256rmb; break;
|
|
|
|
case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPEQDZ256rmbk; break;
|
|
|
|
case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPEQDZ256rmk; break;
|
|
|
|
case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPEQDZ256rr; break;
|
|
|
|
case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPEQDZ256rrk; break;
|
|
|
|
case X86::VPCMPDZrmi: NewOpc = X86::VPCMPEQDZrm; break;
|
|
|
|
case X86::VPCMPDZrmib: NewOpc = X86::VPCMPEQDZrmb; break;
|
|
|
|
case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPEQDZrmbk; break;
|
|
|
|
case X86::VPCMPDZrmik: NewOpc = X86::VPCMPEQDZrmk; break;
|
|
|
|
case X86::VPCMPDZrri: NewOpc = X86::VPCMPEQDZrr; break;
|
|
|
|
case X86::VPCMPDZrrik: NewOpc = X86::VPCMPEQDZrrk; break;
|
|
|
|
case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPEQQZ128rm; break;
|
|
|
|
case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPEQQZ128rmb; break;
|
|
|
|
case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPEQQZ128rmbk; break;
|
|
|
|
case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPEQQZ128rmk; break;
|
|
|
|
case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPEQQZ128rr; break;
|
|
|
|
case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPEQQZ128rrk; break;
|
|
|
|
case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPEQQZ256rm; break;
|
|
|
|
case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPEQQZ256rmb; break;
|
|
|
|
case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPEQQZ256rmbk; break;
|
|
|
|
case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPEQQZ256rmk; break;
|
|
|
|
case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPEQQZ256rr; break;
|
|
|
|
case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPEQQZ256rrk; break;
|
|
|
|
case X86::VPCMPQZrmi: NewOpc = X86::VPCMPEQQZrm; break;
|
|
|
|
case X86::VPCMPQZrmib: NewOpc = X86::VPCMPEQQZrmb; break;
|
|
|
|
case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPEQQZrmbk; break;
|
|
|
|
case X86::VPCMPQZrmik: NewOpc = X86::VPCMPEQQZrmk; break;
|
|
|
|
case X86::VPCMPQZrri: NewOpc = X86::VPCMPEQQZrr; break;
|
|
|
|
case X86::VPCMPQZrrik: NewOpc = X86::VPCMPEQQZrrk; break;
|
|
|
|
case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPEQWZ128rm; break;
|
|
|
|
case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPEQWZ128rmk; break;
|
|
|
|
case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPEQWZ128rr; break;
|
|
|
|
case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPEQWZ128rrk; break;
|
|
|
|
case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPEQWZ256rm; break;
|
|
|
|
case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPEQWZ256rmk; break;
|
|
|
|
case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPEQWZ256rr; break;
|
|
|
|
case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPEQWZ256rrk; break;
|
|
|
|
case X86::VPCMPWZrmi: NewOpc = X86::VPCMPEQWZrm; break;
|
|
|
|
case X86::VPCMPWZrmik: NewOpc = X86::VPCMPEQWZrmk; break;
|
|
|
|
case X86::VPCMPWZrri: NewOpc = X86::VPCMPEQWZrr; break;
|
|
|
|
case X86::VPCMPWZrrik: NewOpc = X86::VPCMPEQWZrrk; break;
|
|
|
|
}
|
|
|
|
|
|
|
|
OutMI.setOpcode(NewOpc);
|
|
|
|
OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Turn immediate 6 into the VPCMPGT instruction.
|
|
|
|
if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) {
|
|
|
|
unsigned NewOpc;
|
|
|
|
switch (OutMI.getOpcode()) {
|
2020-01-07 00:17:05 +08:00
|
|
|
default: llvm_unreachable("Invalid opcode");
|
2019-10-05 02:02:46 +08:00
|
|
|
case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPGTBZ128rm; break;
|
|
|
|
case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPGTBZ128rmk; break;
|
|
|
|
case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPGTBZ128rr; break;
|
|
|
|
case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPGTBZ128rrk; break;
|
|
|
|
case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPGTBZ256rm; break;
|
|
|
|
case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPGTBZ256rmk; break;
|
|
|
|
case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPGTBZ256rr; break;
|
|
|
|
case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPGTBZ256rrk; break;
|
|
|
|
case X86::VPCMPBZrmi: NewOpc = X86::VPCMPGTBZrm; break;
|
|
|
|
case X86::VPCMPBZrmik: NewOpc = X86::VPCMPGTBZrmk; break;
|
|
|
|
case X86::VPCMPBZrri: NewOpc = X86::VPCMPGTBZrr; break;
|
|
|
|
case X86::VPCMPBZrrik: NewOpc = X86::VPCMPGTBZrrk; break;
|
|
|
|
case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPGTDZ128rm; break;
|
|
|
|
case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPGTDZ128rmb; break;
|
|
|
|
case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPGTDZ128rmbk; break;
|
|
|
|
case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPGTDZ128rmk; break;
|
|
|
|
case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPGTDZ128rr; break;
|
|
|
|
case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPGTDZ128rrk; break;
|
|
|
|
case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPGTDZ256rm; break;
|
|
|
|
case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPGTDZ256rmb; break;
|
|
|
|
case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPGTDZ256rmbk; break;
|
|
|
|
case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPGTDZ256rmk; break;
|
|
|
|
case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPGTDZ256rr; break;
|
|
|
|
case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPGTDZ256rrk; break;
|
|
|
|
case X86::VPCMPDZrmi: NewOpc = X86::VPCMPGTDZrm; break;
|
|
|
|
case X86::VPCMPDZrmib: NewOpc = X86::VPCMPGTDZrmb; break;
|
|
|
|
case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPGTDZrmbk; break;
|
|
|
|
case X86::VPCMPDZrmik: NewOpc = X86::VPCMPGTDZrmk; break;
|
|
|
|
case X86::VPCMPDZrri: NewOpc = X86::VPCMPGTDZrr; break;
|
|
|
|
case X86::VPCMPDZrrik: NewOpc = X86::VPCMPGTDZrrk; break;
|
|
|
|
case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPGTQZ128rm; break;
|
|
|
|
case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPGTQZ128rmb; break;
|
|
|
|
case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPGTQZ128rmbk; break;
|
|
|
|
case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPGTQZ128rmk; break;
|
|
|
|
case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPGTQZ128rr; break;
|
|
|
|
case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPGTQZ128rrk; break;
|
|
|
|
case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPGTQZ256rm; break;
|
|
|
|
case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPGTQZ256rmb; break;
|
|
|
|
case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPGTQZ256rmbk; break;
|
|
|
|
case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPGTQZ256rmk; break;
|
|
|
|
case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPGTQZ256rr; break;
|
|
|
|
case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPGTQZ256rrk; break;
|
|
|
|
case X86::VPCMPQZrmi: NewOpc = X86::VPCMPGTQZrm; break;
|
|
|
|
case X86::VPCMPQZrmib: NewOpc = X86::VPCMPGTQZrmb; break;
|
|
|
|
case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPGTQZrmbk; break;
|
|
|
|
case X86::VPCMPQZrmik: NewOpc = X86::VPCMPGTQZrmk; break;
|
|
|
|
case X86::VPCMPQZrri: NewOpc = X86::VPCMPGTQZrr; break;
|
|
|
|
case X86::VPCMPQZrrik: NewOpc = X86::VPCMPGTQZrrk; break;
|
|
|
|
case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPGTWZ128rm; break;
|
|
|
|
case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPGTWZ128rmk; break;
|
|
|
|
case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPGTWZ128rr; break;
|
|
|
|
case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPGTWZ128rrk; break;
|
|
|
|
case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPGTWZ256rm; break;
|
|
|
|
case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPGTWZ256rmk; break;
|
|
|
|
case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPGTWZ256rr; break;
|
|
|
|
case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPGTWZ256rrk; break;
|
|
|
|
case X86::VPCMPWZrmi: NewOpc = X86::VPCMPGTWZrm; break;
|
|
|
|
case X86::VPCMPWZrmik: NewOpc = X86::VPCMPGTWZrmk; break;
|
|
|
|
case X86::VPCMPWZrri: NewOpc = X86::VPCMPGTWZrr; break;
|
|
|
|
case X86::VPCMPWZrrik: NewOpc = X86::VPCMPGTWZrrk; break;
|
|
|
|
}
|
|
|
|
|
|
|
|
OutMI.setOpcode(NewOpc);
|
|
|
|
OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2019-08-28 01:24:23 +08:00
|
|
|
// CALL64r, CALL64pcrel32 - These instructions used to have
|
2019-08-23 00:23:35 +08:00
|
|
|
// register inputs modeled as normal uses instead of implicit uses. As such,
|
|
|
|
// they we used to truncate off all but the first operand (the callee). This
|
|
|
|
// issue seems to have been fixed at some point. This assert verifies that.
|
2010-05-19 12:31:36 +08:00
|
|
|
case X86::CALL64r:
|
2019-08-23 00:23:35 +08:00
|
|
|
case X86::CALL64pcrel32:
|
|
|
|
assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
|
2010-05-19 05:40:18 +08:00
|
|
|
break;
|
2010-05-19 12:31:36 +08:00
|
|
|
|
2010-10-27 02:09:55 +08:00
|
|
|
case X86::EH_RETURN:
|
|
|
|
case X86::EH_RETURN64: {
|
|
|
|
OutMI = MCInst();
|
2014-01-08 20:58:07 +08:00
|
|
|
OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
|
2010-10-27 02:09:55 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2015-10-02 02:44:59 +08:00
|
|
|
case X86::CLEANUPRET: {
|
2018-10-23 07:34:24 +08:00
|
|
|
// Replace CLEANUPRET with the appropriate RET.
|
2015-10-02 02:44:59 +08:00
|
|
|
OutMI = MCInst();
|
|
|
|
OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case X86::CATCHRET: {
|
|
|
|
// Replace CATCHRET with the appropriate RET.
|
|
|
|
const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
|
|
|
|
unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
|
|
|
|
OutMI = MCInst();
|
|
|
|
OutMI.setOpcode(getRetOpcode(Subtarget));
|
|
|
|
OutMI.addOperand(MCOperand::createReg(ReturnReg));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2019-08-28 01:24:23 +08:00
|
|
|
// TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
|
|
|
|
// instruction.
|
|
|
|
case X86::TAILJMPr:
|
|
|
|
case X86::TAILJMPr64:
|
|
|
|
case X86::TAILJMPr64_REX:
|
|
|
|
case X86::TAILJMPd:
|
|
|
|
case X86::TAILJMPd64:
|
|
|
|
assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
|
|
|
|
OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
|
|
|
|
break;
|
2010-05-19 23:26:43 +08:00
|
|
|
|
[X86] Merge the different Jcc instructions for each condition code into single instructions that store the condition code as an operand.
Summary:
This avoids needing an isel pattern for each condition code. And it removes translation switches for converting between Jcc instructions and condition codes.
Now the printer, encoder and disassembler take care of converting the immediate. We use InstAliases to handle the assembly matching. But we print using the asm string in the instruction definition. The instruction itself is marked IsCodeGenOnly=1 to hide it from the assembly parser.
Reviewers: spatel, lebedev.ri, courbet, gchatelet, RKSimon
Reviewed By: RKSimon
Subscribers: MatzeB, qcolombet, eraman, hiraditya, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60228
llvm-svn: 357802
2019-04-06 03:28:09 +08:00
|
|
|
case X86::TAILJMPd_CC:
|
2019-08-28 01:24:23 +08:00
|
|
|
case X86::TAILJMPd64_CC:
|
2019-08-23 00:23:35 +08:00
|
|
|
assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!");
|
2019-08-28 01:24:23 +08:00
|
|
|
OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
|
|
|
|
break;
|
|
|
|
|
|
|
|
case X86::TAILJMPm:
|
|
|
|
case X86::TAILJMPm64:
|
|
|
|
case X86::TAILJMPm64_REX:
|
|
|
|
assert(OutMI.getNumOperands() == X86::AddrNumOperands &&
|
|
|
|
"Unexpected number of operands!");
|
|
|
|
OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
|
[X86] Merge the different Jcc instructions for each condition code into single instructions that store the condition code as an operand.
Summary:
This avoids needing an isel pattern for each condition code. And it removes translation switches for converting between Jcc instructions and condition codes.
Now the printer, encoder and disassembler take care of converting the immediate. We use InstAliases to handle the assembly matching. But we print using the asm string in the instruction definition. The instruction itself is marked IsCodeGenOnly=1 to hide it from the assembly parser.
Reviewers: spatel, lebedev.ri, courbet, gchatelet, RKSimon
Reviewed By: RKSimon
Subscribers: MatzeB, qcolombet, eraman, hiraditya, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60228
llvm-svn: 357802
2019-04-06 03:28:09 +08:00
|
|
|
break;
|
|
|
|
|
2015-01-06 15:35:50 +08:00
|
|
|
case X86::DEC16r:
|
|
|
|
case X86::DEC32r:
|
|
|
|
case X86::INC16r:
|
|
|
|
case X86::INC32r:
|
|
|
|
// If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
|
|
|
|
if (!AsmPrinter.getSubtarget().is64Bit()) {
|
|
|
|
unsigned Opcode;
|
|
|
|
switch (OutMI.getOpcode()) {
|
2018-07-02 14:42:42 +08:00
|
|
|
default: llvm_unreachable("Invalid opcode");
|
|
|
|
case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
|
|
|
|
case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
|
|
|
|
case X86::INC16r: Opcode = X86::INC16r_alt; break;
|
|
|
|
case X86::INC32r: Opcode = X86::INC32r_alt; break;
|
2015-01-06 15:35:50 +08:00
|
|
|
}
|
|
|
|
OutMI.setOpcode(Opcode);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2010-05-19 01:22:24 +08:00
|
|
|
// We don't currently select the correct instruction form for instructions
|
|
|
|
// which have a short %eax, etc. form. Handle this by custom lowering, for
|
|
|
|
// now.
|
|
|
|
//
|
|
|
|
// Note, we are currently not handling the following instructions:
|
2010-05-19 14:20:44 +08:00
|
|
|
// MOV64ao8, MOV64o8a
|
2010-05-19 01:22:24 +08:00
|
|
|
// XCHG16ar, XCHG32ar, XCHG64ar
|
2017-09-28 04:34:17 +08:00
|
|
|
case X86::MOV8mr_NOREX:
|
2016-04-29 08:51:30 +08:00
|
|
|
case X86::MOV8mr:
|
2017-09-28 04:34:17 +08:00
|
|
|
case X86::MOV8rm_NOREX:
|
2016-04-29 08:51:30 +08:00
|
|
|
case X86::MOV8rm:
|
|
|
|
case X86::MOV16mr:
|
|
|
|
case X86::MOV16rm:
|
|
|
|
case X86::MOV32mr:
|
|
|
|
case X86::MOV32rm: {
|
|
|
|
unsigned NewOpc;
|
|
|
|
switch (OutMI.getOpcode()) {
|
2018-07-02 14:42:42 +08:00
|
|
|
default: llvm_unreachable("Invalid opcode");
|
2017-09-28 04:34:17 +08:00
|
|
|
case X86::MOV8mr_NOREX:
|
2018-07-02 14:42:42 +08:00
|
|
|
case X86::MOV8mr: NewOpc = X86::MOV8o32a; break;
|
2017-09-28 04:34:17 +08:00
|
|
|
case X86::MOV8rm_NOREX:
|
2018-07-02 14:42:42 +08:00
|
|
|
case X86::MOV8rm: NewOpc = X86::MOV8ao32; break;
|
|
|
|
case X86::MOV16mr: NewOpc = X86::MOV16o32a; break;
|
|
|
|
case X86::MOV16rm: NewOpc = X86::MOV16ao32; break;
|
|
|
|
case X86::MOV32mr: NewOpc = X86::MOV32o32a; break;
|
|
|
|
case X86::MOV32rm: NewOpc = X86::MOV32ao32; break;
|
2016-04-29 08:51:30 +08:00
|
|
|
}
|
|
|
|
SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2018-07-02 14:42:42 +08:00
|
|
|
case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32:
|
|
|
|
case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32:
|
|
|
|
case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32:
|
|
|
|
case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32:
|
|
|
|
case X86::OR8ri: case X86::OR16ri: case X86::OR32ri: case X86::OR64ri32:
|
|
|
|
case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32:
|
|
|
|
case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32:
|
|
|
|
case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32:
|
|
|
|
case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: {
|
2016-04-29 08:51:30 +08:00
|
|
|
unsigned NewOpc;
|
|
|
|
switch (OutMI.getOpcode()) {
|
2018-07-02 14:42:42 +08:00
|
|
|
default: llvm_unreachable("Invalid opcode");
|
|
|
|
case X86::ADC8ri: NewOpc = X86::ADC8i8; break;
|
|
|
|
case X86::ADC16ri: NewOpc = X86::ADC16i16; break;
|
|
|
|
case X86::ADC32ri: NewOpc = X86::ADC32i32; break;
|
|
|
|
case X86::ADC64ri32: NewOpc = X86::ADC64i32; break;
|
|
|
|
case X86::ADD8ri: NewOpc = X86::ADD8i8; break;
|
|
|
|
case X86::ADD16ri: NewOpc = X86::ADD16i16; break;
|
|
|
|
case X86::ADD32ri: NewOpc = X86::ADD32i32; break;
|
|
|
|
case X86::ADD64ri32: NewOpc = X86::ADD64i32; break;
|
|
|
|
case X86::AND8ri: NewOpc = X86::AND8i8; break;
|
|
|
|
case X86::AND16ri: NewOpc = X86::AND16i16; break;
|
|
|
|
case X86::AND32ri: NewOpc = X86::AND32i32; break;
|
|
|
|
case X86::AND64ri32: NewOpc = X86::AND64i32; break;
|
|
|
|
case X86::CMP8ri: NewOpc = X86::CMP8i8; break;
|
|
|
|
case X86::CMP16ri: NewOpc = X86::CMP16i16; break;
|
|
|
|
case X86::CMP32ri: NewOpc = X86::CMP32i32; break;
|
|
|
|
case X86::CMP64ri32: NewOpc = X86::CMP64i32; break;
|
|
|
|
case X86::OR8ri: NewOpc = X86::OR8i8; break;
|
|
|
|
case X86::OR16ri: NewOpc = X86::OR16i16; break;
|
|
|
|
case X86::OR32ri: NewOpc = X86::OR32i32; break;
|
|
|
|
case X86::OR64ri32: NewOpc = X86::OR64i32; break;
|
|
|
|
case X86::SBB8ri: NewOpc = X86::SBB8i8; break;
|
|
|
|
case X86::SBB16ri: NewOpc = X86::SBB16i16; break;
|
|
|
|
case X86::SBB32ri: NewOpc = X86::SBB32i32; break;
|
|
|
|
case X86::SBB64ri32: NewOpc = X86::SBB64i32; break;
|
|
|
|
case X86::SUB8ri: NewOpc = X86::SUB8i8; break;
|
|
|
|
case X86::SUB16ri: NewOpc = X86::SUB16i16; break;
|
|
|
|
case X86::SUB32ri: NewOpc = X86::SUB32i32; break;
|
|
|
|
case X86::SUB64ri32: NewOpc = X86::SUB64i32; break;
|
|
|
|
case X86::TEST8ri: NewOpc = X86::TEST8i8; break;
|
|
|
|
case X86::TEST16ri: NewOpc = X86::TEST16i16; break;
|
|
|
|
case X86::TEST32ri: NewOpc = X86::TEST32i32; break;
|
|
|
|
case X86::TEST64ri32: NewOpc = X86::TEST64i32; break;
|
|
|
|
case X86::XOR8ri: NewOpc = X86::XOR8i8; break;
|
|
|
|
case X86::XOR16ri: NewOpc = X86::XOR16i16; break;
|
|
|
|
case X86::XOR32ri: NewOpc = X86::XOR32i32; break;
|
|
|
|
case X86::XOR64ri32: NewOpc = X86::XOR64i32; break;
|
2016-04-29 08:51:30 +08:00
|
|
|
}
|
|
|
|
SimplifyShortImmForm(OutMI, NewOpc);
|
|
|
|
break;
|
|
|
|
}
|
2011-10-27 05:12:27 +08:00
|
|
|
|
2013-07-13 02:06:44 +08:00
|
|
|
// Try to shrink some forms of movsx.
|
|
|
|
case X86::MOVSX16rr8:
|
|
|
|
case X86::MOVSX32rr16:
|
|
|
|
case X86::MOVSX64rr32:
|
|
|
|
SimplifyMOVSX(OutMI);
|
|
|
|
break;
|
2019-11-05 11:58:53 +08:00
|
|
|
|
|
|
|
case X86::VCMPPDrri:
|
|
|
|
case X86::VCMPPDYrri:
|
|
|
|
case X86::VCMPPSrri:
|
|
|
|
case X86::VCMPPSYrri:
|
|
|
|
case X86::VCMPSDrr:
|
|
|
|
case X86::VCMPSSrr: {
|
|
|
|
// Swap the operands if it will enable a 2 byte VEX encoding.
|
|
|
|
// FIXME: Change the immediate to improve opportunities?
|
|
|
|
if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
|
|
|
|
X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
|
|
|
|
unsigned Imm = MI->getOperand(3).getImm() & 0x7;
|
|
|
|
switch (Imm) {
|
|
|
|
default: break;
|
|
|
|
case 0x00: // EQUAL
|
|
|
|
case 0x03: // UNORDERED
|
|
|
|
case 0x04: // NOT EQUAL
|
|
|
|
case 0x07: // ORDERED
|
|
|
|
std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case X86::VMOVHLPSrr:
|
|
|
|
case X86::VUNPCKHPDrr:
|
|
|
|
// These are not truly commutable so hide them from the default case.
|
|
|
|
break;
|
|
|
|
|
|
|
|
default: {
|
|
|
|
// If the instruction is a commutable arithmetic instruction we might be
|
|
|
|
// able to commute the operands to get a 2 byte VEX prefix.
|
|
|
|
uint64_t TSFlags = MI->getDesc().TSFlags;
|
|
|
|
if (MI->getDesc().isCommutable() &&
|
|
|
|
(TSFlags & X86II::EncodingMask) == X86II::VEX &&
|
|
|
|
(TSFlags & X86II::OpMapMask) == X86II::TB &&
|
|
|
|
(TSFlags & X86II::FormMask) == X86II::MRMSrcReg &&
|
|
|
|
!(TSFlags & X86II::VEX_W) && (TSFlags & X86II::VEX_4V) &&
|
|
|
|
OutMI.getNumOperands() == 3) {
|
|
|
|
if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
|
|
|
|
X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg()))
|
|
|
|
std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2011-10-27 05:12:27 +08:00
|
|
|
}
|
2009-09-13 04:34:57 +08:00
|
|
|
}
|
|
|
|
|
2014-07-25 04:40:55 +08:00
|
|
|
void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
|
|
|
|
const MachineInstr &MI) {
|
2020-01-17 06:08:31 +08:00
|
|
|
NoAutoPaddingScope NoPadScope(*OutStreamer);
|
2020-12-03 06:20:36 +08:00
|
|
|
bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 &&
|
|
|
|
MI.getOpcode() != X86::TLS_base_addr32;
|
2020-12-13 01:05:24 +08:00
|
|
|
bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
|
|
|
|
MI.getOpcode() == X86::TLS_base_addr64;
|
2019-05-23 09:05:13 +08:00
|
|
|
MCContext &Ctx = OutStreamer->getContext();
|
2012-06-02 00:27:21 +08:00
|
|
|
|
|
|
|
MCSymbolRefExpr::VariantKind SRVK;
|
|
|
|
switch (MI.getOpcode()) {
|
2018-04-18 05:30:29 +08:00
|
|
|
case X86::TLS_addr32:
|
|
|
|
case X86::TLS_addr64:
|
2020-12-03 06:20:36 +08:00
|
|
|
case X86::TLS_addrX32:
|
2018-04-18 05:30:29 +08:00
|
|
|
SRVK = MCSymbolRefExpr::VK_TLSGD;
|
|
|
|
break;
|
|
|
|
case X86::TLS_base_addr32:
|
|
|
|
SRVK = MCSymbolRefExpr::VK_TLSLDM;
|
|
|
|
break;
|
|
|
|
case X86::TLS_base_addr64:
|
2020-12-03 06:20:36 +08:00
|
|
|
case X86::TLS_base_addrX32:
|
2018-04-18 05:30:29 +08:00
|
|
|
SRVK = MCSymbolRefExpr::VK_TLSLD;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("unexpected opcode");
|
2012-06-02 00:27:21 +08:00
|
|
|
}
|
|
|
|
|
2019-05-23 09:05:13 +08:00
|
|
|
const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
|
|
|
|
MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx);
|
2019-07-11 18:10:09 +08:00
|
|
|
|
|
|
|
// As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
|
|
|
|
// code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
|
|
|
|
// attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
|
|
|
|
// only using GOT when GOTPCRELX is enabled.
|
|
|
|
// TODO Delete the workaround when GOTPCRELX becomes commonplace.
|
|
|
|
bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
|
|
|
|
Ctx.getAsmInfo()->canRelaxRelocations();
|
2019-05-23 09:05:13 +08:00
|
|
|
|
|
|
|
if (Is64Bits) {
|
|
|
|
bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
|
2020-12-13 01:05:24 +08:00
|
|
|
if (NeedsPadding && Is64BitsLP64)
|
2019-05-23 09:05:13 +08:00
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
|
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
|
|
|
|
.addReg(X86::RDI)
|
|
|
|
.addReg(X86::RIP)
|
|
|
|
.addImm(1)
|
|
|
|
.addReg(0)
|
|
|
|
.addExpr(Sym)
|
|
|
|
.addReg(0));
|
|
|
|
const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
|
|
|
|
if (NeedsPadding) {
|
|
|
|
if (!UseGot)
|
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
|
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
|
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
|
|
|
|
}
|
|
|
|
if (UseGot) {
|
|
|
|
const MCExpr *Expr = MCSymbolRefExpr::create(
|
|
|
|
TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
|
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
|
|
|
|
.addReg(X86::RIP)
|
|
|
|
.addImm(1)
|
|
|
|
.addReg(0)
|
|
|
|
.addExpr(Expr)
|
|
|
|
.addReg(0));
|
|
|
|
} else {
|
|
|
|
EmitAndCountInstruction(
|
|
|
|
MCInstBuilder(X86::CALL64pcrel32)
|
|
|
|
.addExpr(MCSymbolRefExpr::create(TlsGetAddr,
|
|
|
|
MCSymbolRefExpr::VK_PLT, Ctx)));
|
|
|
|
}
|
2010-11-29 05:16:39 +08:00
|
|
|
} else {
|
2019-05-23 09:05:13 +08:00
|
|
|
if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) {
|
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
|
|
|
|
.addReg(X86::EAX)
|
|
|
|
.addReg(0)
|
|
|
|
.addImm(1)
|
|
|
|
.addReg(X86::EBX)
|
|
|
|
.addExpr(Sym)
|
|
|
|
.addReg(0));
|
|
|
|
} else {
|
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
|
|
|
|
.addReg(X86::EAX)
|
|
|
|
.addReg(X86::EBX)
|
|
|
|
.addImm(1)
|
|
|
|
.addReg(0)
|
|
|
|
.addExpr(Sym)
|
|
|
|
.addReg(0));
|
|
|
|
}
|
2010-11-29 05:16:39 +08:00
|
|
|
|
2019-05-23 09:05:13 +08:00
|
|
|
const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
|
|
|
|
if (UseGot) {
|
|
|
|
const MCExpr *Expr =
|
|
|
|
MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx);
|
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
|
|
|
|
.addReg(X86::EBX)
|
|
|
|
.addImm(1)
|
|
|
|
.addReg(0)
|
|
|
|
.addExpr(Expr)
|
|
|
|
.addReg(0));
|
|
|
|
} else {
|
|
|
|
EmitAndCountInstruction(
|
|
|
|
MCInstBuilder(X86::CALLpcrel32)
|
|
|
|
.addExpr(MCSymbolRefExpr::create(TlsGetAddr,
|
|
|
|
MCSymbolRefExpr::VK_PLT, Ctx)));
|
|
|
|
}
|
2010-11-29 05:16:39 +08:00
|
|
|
}
|
|
|
|
}
|
2010-04-28 09:39:28 +08:00
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Emit the largest nop instruction smaller than or equal to \p NumBytes
|
2016-04-20 02:48:13 +08:00
|
|
|
/// bytes. Return the size of nop emitted.
|
2020-06-18 00:08:12 +08:00
|
|
|
static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
|
|
|
|
const X86Subtarget *Subtarget) {
|
2020-07-28 00:40:26 +08:00
|
|
|
// Determine the longest nop which can be efficiently decoded for the given
|
|
|
|
// target cpu. 15-bytes is the longest single NOP instruction, but some
|
|
|
|
// platforms can't decode the longest forms efficiently.
|
2020-07-26 13:05:46 +08:00
|
|
|
unsigned MaxNopLength = 1;
|
|
|
|
if (Subtarget->is64Bit()) {
|
|
|
|
// FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
|
|
|
|
// IndexReg/BaseReg below need to be updated.
|
|
|
|
if (Subtarget->hasFeature(X86::FeatureFast7ByteNOP))
|
|
|
|
MaxNopLength = 7;
|
|
|
|
else if (Subtarget->hasFeature(X86::FeatureFast15ByteNOP))
|
|
|
|
MaxNopLength = 15;
|
|
|
|
else if (Subtarget->hasFeature(X86::FeatureFast11ByteNOP))
|
|
|
|
MaxNopLength = 11;
|
|
|
|
else
|
|
|
|
MaxNopLength = 10;
|
|
|
|
} if (Subtarget->is32Bit())
|
|
|
|
MaxNopLength = 2;
|
|
|
|
|
2020-01-12 00:41:35 +08:00
|
|
|
// Cap a single nop emission at the profitable value for the target
|
2020-07-26 13:05:46 +08:00
|
|
|
NumBytes = std::min(NumBytes, MaxNopLength);
|
2020-01-12 00:41:35 +08:00
|
|
|
|
2016-04-20 02:48:13 +08:00
|
|
|
unsigned NopSize;
|
|
|
|
unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
|
2019-05-07 19:09:16 +08:00
|
|
|
IndexReg = Displacement = SegmentReg = 0;
|
2016-04-20 02:48:13 +08:00
|
|
|
BaseReg = X86::RAX;
|
|
|
|
ScaleVal = 1;
|
|
|
|
switch (NumBytes) {
|
2018-04-18 05:30:29 +08:00
|
|
|
case 0:
|
|
|
|
llvm_unreachable("Zero nops?");
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
NopSize = 1;
|
|
|
|
Opc = X86::NOOP;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
NopSize = 2;
|
|
|
|
Opc = X86::XCHG16ar;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
NopSize = 3;
|
|
|
|
Opc = X86::NOOPL;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
NopSize = 4;
|
|
|
|
Opc = X86::NOOPL;
|
|
|
|
Displacement = 8;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
|
|
NopSize = 5;
|
|
|
|
Opc = X86::NOOPL;
|
|
|
|
Displacement = 8;
|
|
|
|
IndexReg = X86::RAX;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
|
|
NopSize = 6;
|
|
|
|
Opc = X86::NOOPW;
|
|
|
|
Displacement = 8;
|
|
|
|
IndexReg = X86::RAX;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
|
|
NopSize = 7;
|
|
|
|
Opc = X86::NOOPL;
|
|
|
|
Displacement = 512;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
|
|
NopSize = 8;
|
|
|
|
Opc = X86::NOOPL;
|
|
|
|
Displacement = 512;
|
|
|
|
IndexReg = X86::RAX;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
|
|
NopSize = 9;
|
|
|
|
Opc = X86::NOOPW;
|
|
|
|
Displacement = 512;
|
|
|
|
IndexReg = X86::RAX;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
NopSize = 10;
|
|
|
|
Opc = X86::NOOPW;
|
|
|
|
Displacement = 512;
|
|
|
|
IndexReg = X86::RAX;
|
|
|
|
SegmentReg = X86::CS;
|
|
|
|
break;
|
2016-04-20 02:48:13 +08:00
|
|
|
}
|
2013-12-04 08:39:08 +08:00
|
|
|
|
2016-04-20 02:48:13 +08:00
|
|
|
unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
|
|
|
|
NopSize += NumPrefixes;
|
|
|
|
for (unsigned i = 0; i != NumPrefixes; ++i)
|
2020-02-15 10:16:24 +08:00
|
|
|
OS.emitBytes("\x66");
|
2016-04-19 13:24:47 +08:00
|
|
|
|
2016-04-20 02:48:13 +08:00
|
|
|
switch (Opc) {
|
2018-07-02 14:42:42 +08:00
|
|
|
default: llvm_unreachable("Unexpected opcode");
|
2016-04-20 02:48:13 +08:00
|
|
|
case X86::NOOP:
|
2020-06-18 00:08:12 +08:00
|
|
|
OS.emitInstruction(MCInstBuilder(Opc), *Subtarget);
|
2016-04-20 02:48:13 +08:00
|
|
|
break;
|
|
|
|
case X86::XCHG16ar:
|
2020-06-18 00:08:12 +08:00
|
|
|
OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX),
|
|
|
|
*Subtarget);
|
2016-04-20 02:48:13 +08:00
|
|
|
break;
|
|
|
|
case X86::NOOPL:
|
|
|
|
case X86::NOOPW:
|
2020-02-14 13:58:16 +08:00
|
|
|
OS.emitInstruction(MCInstBuilder(Opc)
|
2016-04-20 02:48:13 +08:00
|
|
|
.addReg(BaseReg)
|
|
|
|
.addImm(ScaleVal)
|
|
|
|
.addReg(IndexReg)
|
|
|
|
.addImm(Displacement)
|
|
|
|
.addReg(SegmentReg),
|
2020-06-18 00:08:12 +08:00
|
|
|
*Subtarget);
|
2016-04-20 02:48:13 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
assert(NopSize <= NumBytes && "We overemitted?");
|
|
|
|
return NopSize;
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Emit the optimal amount of multi-byte nops on X86.
|
2020-06-18 00:08:12 +08:00
|
|
|
static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
|
|
|
|
const X86Subtarget *Subtarget) {
|
2016-04-21 01:53:21 +08:00
|
|
|
unsigned NopsToEmit = NumBytes;
|
2016-04-21 02:45:31 +08:00
|
|
|
(void)NopsToEmit;
|
2016-04-20 02:48:13 +08:00
|
|
|
while (NumBytes) {
|
2020-06-18 00:08:12 +08:00
|
|
|
NumBytes -= emitNop(OS, NumBytes, Subtarget);
|
2016-04-21 01:53:21 +08:00
|
|
|
assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
|
2016-04-20 02:48:13 +08:00
|
|
|
}
|
2013-12-04 08:39:08 +08:00
|
|
|
}
|
|
|
|
|
2015-05-07 07:53:26 +08:00
|
|
|
void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
|
|
|
|
X86MCInstLower &MCIL) {
|
|
|
|
assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
|
2014-12-02 06:52:56 +08:00
|
|
|
|
2020-01-09 01:58:42 +08:00
|
|
|
NoAutoPaddingScope NoPadScope(*OutStreamer);
|
|
|
|
|
2015-05-13 07:52:24 +08:00
|
|
|
StatepointOpers SOpers(&MI);
|
|
|
|
if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
|
2020-06-18 00:08:12 +08:00
|
|
|
emitX86Nops(*OutStreamer, PatchBytes, Subtarget);
|
2015-05-13 07:52:24 +08:00
|
|
|
} else {
|
|
|
|
// Lower call target and choose correct opcode
|
|
|
|
const MachineOperand &CallTarget = SOpers.getCallTarget();
|
|
|
|
MCOperand CallTargetMCOp;
|
|
|
|
unsigned CallOpcode;
|
|
|
|
switch (CallTarget.getType()) {
|
|
|
|
case MachineOperand::MO_GlobalAddress:
|
|
|
|
case MachineOperand::MO_ExternalSymbol:
|
|
|
|
CallTargetMCOp = MCIL.LowerSymbolOperand(
|
|
|
|
CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
|
|
|
|
CallOpcode = X86::CALL64pcrel32;
|
|
|
|
// Currently, we only support relative addressing with statepoints.
|
|
|
|
// Otherwise, we'll need a scratch register to hold the target
|
|
|
|
// address. You'll fail asserts during load & relocation if this
|
|
|
|
// symbol is to far away. (TODO: support non-relative addressing)
|
|
|
|
break;
|
|
|
|
case MachineOperand::MO_Immediate:
|
2015-05-14 02:37:00 +08:00
|
|
|
CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
|
2015-05-13 07:52:24 +08:00
|
|
|
CallOpcode = X86::CALL64pcrel32;
|
|
|
|
// Currently, we only support relative addressing with statepoints.
|
|
|
|
// Otherwise, we'll need a scratch register to hold the target
|
|
|
|
// immediate. You'll fail asserts during load & relocation if this
|
|
|
|
// address is to far away. (TODO: support non-relative addressing)
|
|
|
|
break;
|
|
|
|
case MachineOperand::MO_Register:
|
Introduce the "retpoline" x86 mitigation technique for variant #2 of the speculative execution vulnerabilities disclosed today, specifically identified by CVE-2017-5715, "Branch Target Injection", and is one of the two halves to Spectre..
Summary:
First, we need to explain the core of the vulnerability. Note that this
is a very incomplete description, please see the Project Zero blog post
for details:
https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
The basis for branch target injection is to direct speculative execution
of the processor to some "gadget" of executable code by poisoning the
prediction of indirect branches with the address of that gadget. The
gadget in turn contains an operation that provides a side channel for
reading data. Most commonly, this will look like a load of secret data
followed by a branch on the loaded value and then a load of some
predictable cache line. The attacker then uses timing of the processors
cache to determine which direction the branch took *in the speculative
execution*, and in turn what one bit of the loaded value was. Due to the
nature of these timing side channels and the branch predictor on Intel
processors, this allows an attacker to leak data only accessible to
a privileged domain (like the kernel) back into an unprivileged domain.
The goal is simple: avoid generating code which contains an indirect
branch that could have its prediction poisoned by an attacker. In many
cases, the compiler can simply use directed conditional branches and
a small search tree. LLVM already has support for lowering switches in
this way and the first step of this patch is to disable jump-table
lowering of switches and introduce a pass to rewrite explicit indirectbr
sequences into a switch over integers.
However, there is no fully general alternative to indirect calls. We
introduce a new construct we call a "retpoline" to implement indirect
calls in a non-speculatable way. It can be thought of loosely as
a trampoline for indirect calls which uses the RET instruction on x86.
Further, we arrange for a specific call->ret sequence which ensures the
processor predicts the return to go to a controlled, known location. The
retpoline then "smashes" the return address pushed onto the stack by the
call with the desired target of the original indirect call. The result
is a predicted return to the next instruction after a call (which can be
used to trap speculative execution within an infinite loop) and an
actual indirect branch to an arbitrary address.
On 64-bit x86 ABIs, this is especially easily done in the compiler by
using a guaranteed scratch register to pass the target into this device.
For 32-bit ABIs there isn't a guaranteed scratch register and so several
different retpoline variants are introduced to use a scratch register if
one is available in the calling convention and to otherwise use direct
stack push/pop sequences to pass the target address.
This "retpoline" mitigation is fully described in the following blog
post: https://support.google.com/faqs/answer/7625886
We also support a target feature that disables emission of the retpoline
thunk by the compiler to allow for custom thunks if users want them.
These are particularly useful in environments like kernels that
routinely do hot-patching on boot and want to hot-patch their thunk to
different code sequences. They can write this custom thunk and use
`-mretpoline-external-thunk` *in addition* to `-mretpoline`. In this
case, on x86-64 thu thunk names must be:
```
__llvm_external_retpoline_r11
```
or on 32-bit:
```
__llvm_external_retpoline_eax
__llvm_external_retpoline_ecx
__llvm_external_retpoline_edx
__llvm_external_retpoline_push
```
And the target of the retpoline is passed in the named register, or in
the case of the `push` suffix on the top of the stack via a `pushl`
instruction.
There is one other important source of indirect branches in x86 ELF
binaries: the PLT. These patches also include support for LLD to
generate PLT entries that perform a retpoline-style indirection.
The only other indirect branches remaining that we are aware of are from
precompiled runtimes (such as crt0.o and similar). The ones we have
found are not really attackable, and so we have not focused on them
here, but eventually these runtimes should also be replicated for
retpoline-ed configurations for completeness.
For kernels or other freestanding or fully static executables, the
compiler switch `-mretpoline` is sufficient to fully mitigate this
particular attack. For dynamic executables, you must compile *all*
libraries with `-mretpoline` and additionally link the dynamic
executable and all shared libraries with LLD and pass `-z retpolineplt`
(or use similar functionality from some other linker). We strongly
recommend also using `-z now` as non-lazy binding allows the
retpoline-mitigated PLT to be substantially smaller.
When manually apply similar transformations to `-mretpoline` to the
Linux kernel we observed very small performance hits to applications
running typical workloads, and relatively minor hits (approximately 2%)
even for extremely syscall-heavy applications. This is largely due to
the small number of indirect branches that occur in performance
sensitive paths of the kernel.
When using these patches on statically linked applications, especially
C++ applications, you should expect to see a much more dramatic
performance hit. For microbenchmarks that are switch, indirect-, or
virtual-call heavy we have seen overheads ranging from 10% to 50%.
However, real-world workloads exhibit substantially lower performance
impact. Notably, techniques such as PGO and ThinLTO dramatically reduce
the impact of hot indirect calls (by speculatively promoting them to
direct calls) and allow optimized search trees to be used to lower
switches. If you need to deploy these techniques in C++ applications, we
*strongly* recommend that you ensure all hot call targets are statically
linked (avoiding PLT indirection) and use both PGO and ThinLTO. Well
tuned servers using all of these techniques saw 5% - 10% overhead from
the use of retpoline.
We will add detailed documentation covering these components in
subsequent patches, but wanted to make the core functionality available
as soon as possible. Happy for more code review, but we'd really like to
get these patches landed and backported ASAP for obvious reasons. We're
planning to backport this to both 6.0 and 5.0 release streams and get
a 5.0 release with just this cherry picked ASAP for distros and vendors.
This patch is the work of a number of people over the past month: Eric, Reid,
Rui, and myself. I'm mailing it out as a single commit due to the time
sensitive nature of landing this and the need to backport it. Huge thanks to
everyone who helped out here, and everyone at Intel who helped out in
discussions about how to craft this. Also, credit goes to Paul Turner (at
Google, but not an LLVM contributor) for much of the underlying retpoline
design.
Reviewers: echristo, rnk, ruiu, craig.topper, DavidKreitzer
Subscribers: sanjoy, emaste, mcrosier, mgorny, mehdi_amini, hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D41723
llvm-svn: 323155
2018-01-23 06:05:25 +08:00
|
|
|
// FIXME: Add retpoline support and remove this.
|
2020-04-03 12:00:44 +08:00
|
|
|
if (Subtarget->useIndirectThunkCalls())
|
|
|
|
report_fatal_error("Lowering register statepoints with thunks not "
|
Introduce the "retpoline" x86 mitigation technique for variant #2 of the speculative execution vulnerabilities disclosed today, specifically identified by CVE-2017-5715, "Branch Target Injection", and is one of the two halves to Spectre..
Summary:
First, we need to explain the core of the vulnerability. Note that this
is a very incomplete description, please see the Project Zero blog post
for details:
https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
The basis for branch target injection is to direct speculative execution
of the processor to some "gadget" of executable code by poisoning the
prediction of indirect branches with the address of that gadget. The
gadget in turn contains an operation that provides a side channel for
reading data. Most commonly, this will look like a load of secret data
followed by a branch on the loaded value and then a load of some
predictable cache line. The attacker then uses timing of the processors
cache to determine which direction the branch took *in the speculative
execution*, and in turn what one bit of the loaded value was. Due to the
nature of these timing side channels and the branch predictor on Intel
processors, this allows an attacker to leak data only accessible to
a privileged domain (like the kernel) back into an unprivileged domain.
The goal is simple: avoid generating code which contains an indirect
branch that could have its prediction poisoned by an attacker. In many
cases, the compiler can simply use directed conditional branches and
a small search tree. LLVM already has support for lowering switches in
this way and the first step of this patch is to disable jump-table
lowering of switches and introduce a pass to rewrite explicit indirectbr
sequences into a switch over integers.
However, there is no fully general alternative to indirect calls. We
introduce a new construct we call a "retpoline" to implement indirect
calls in a non-speculatable way. It can be thought of loosely as
a trampoline for indirect calls which uses the RET instruction on x86.
Further, we arrange for a specific call->ret sequence which ensures the
processor predicts the return to go to a controlled, known location. The
retpoline then "smashes" the return address pushed onto the stack by the
call with the desired target of the original indirect call. The result
is a predicted return to the next instruction after a call (which can be
used to trap speculative execution within an infinite loop) and an
actual indirect branch to an arbitrary address.
On 64-bit x86 ABIs, this is especially easily done in the compiler by
using a guaranteed scratch register to pass the target into this device.
For 32-bit ABIs there isn't a guaranteed scratch register and so several
different retpoline variants are introduced to use a scratch register if
one is available in the calling convention and to otherwise use direct
stack push/pop sequences to pass the target address.
This "retpoline" mitigation is fully described in the following blog
post: https://support.google.com/faqs/answer/7625886
We also support a target feature that disables emission of the retpoline
thunk by the compiler to allow for custom thunks if users want them.
These are particularly useful in environments like kernels that
routinely do hot-patching on boot and want to hot-patch their thunk to
different code sequences. They can write this custom thunk and use
`-mretpoline-external-thunk` *in addition* to `-mretpoline`. In this
case, on x86-64 thu thunk names must be:
```
__llvm_external_retpoline_r11
```
or on 32-bit:
```
__llvm_external_retpoline_eax
__llvm_external_retpoline_ecx
__llvm_external_retpoline_edx
__llvm_external_retpoline_push
```
And the target of the retpoline is passed in the named register, or in
the case of the `push` suffix on the top of the stack via a `pushl`
instruction.
There is one other important source of indirect branches in x86 ELF
binaries: the PLT. These patches also include support for LLD to
generate PLT entries that perform a retpoline-style indirection.
The only other indirect branches remaining that we are aware of are from
precompiled runtimes (such as crt0.o and similar). The ones we have
found are not really attackable, and so we have not focused on them
here, but eventually these runtimes should also be replicated for
retpoline-ed configurations for completeness.
For kernels or other freestanding or fully static executables, the
compiler switch `-mretpoline` is sufficient to fully mitigate this
particular attack. For dynamic executables, you must compile *all*
libraries with `-mretpoline` and additionally link the dynamic
executable and all shared libraries with LLD and pass `-z retpolineplt`
(or use similar functionality from some other linker). We strongly
recommend also using `-z now` as non-lazy binding allows the
retpoline-mitigated PLT to be substantially smaller.
When manually apply similar transformations to `-mretpoline` to the
Linux kernel we observed very small performance hits to applications
running typical workloads, and relatively minor hits (approximately 2%)
even for extremely syscall-heavy applications. This is largely due to
the small number of indirect branches that occur in performance
sensitive paths of the kernel.
When using these patches on statically linked applications, especially
C++ applications, you should expect to see a much more dramatic
performance hit. For microbenchmarks that are switch, indirect-, or
virtual-call heavy we have seen overheads ranging from 10% to 50%.
However, real-world workloads exhibit substantially lower performance
impact. Notably, techniques such as PGO and ThinLTO dramatically reduce
the impact of hot indirect calls (by speculatively promoting them to
direct calls) and allow optimized search trees to be used to lower
switches. If you need to deploy these techniques in C++ applications, we
*strongly* recommend that you ensure all hot call targets are statically
linked (avoiding PLT indirection) and use both PGO and ThinLTO. Well
tuned servers using all of these techniques saw 5% - 10% overhead from
the use of retpoline.
We will add detailed documentation covering these components in
subsequent patches, but wanted to make the core functionality available
as soon as possible. Happy for more code review, but we'd really like to
get these patches landed and backported ASAP for obvious reasons. We're
planning to backport this to both 6.0 and 5.0 release streams and get
a 5.0 release with just this cherry picked ASAP for distros and vendors.
This patch is the work of a number of people over the past month: Eric, Reid,
Rui, and myself. I'm mailing it out as a single commit due to the time
sensitive nature of landing this and the need to backport it. Huge thanks to
everyone who helped out here, and everyone at Intel who helped out in
discussions about how to craft this. Also, credit goes to Paul Turner (at
Google, but not an LLVM contributor) for much of the underlying retpoline
design.
Reviewers: echristo, rnk, ruiu, craig.topper, DavidKreitzer
Subscribers: sanjoy, emaste, mcrosier, mgorny, mehdi_amini, hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D41723
llvm-svn: 323155
2018-01-23 06:05:25 +08:00
|
|
|
"yet implemented.");
|
2015-05-14 02:37:00 +08:00
|
|
|
CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
|
2015-05-13 07:52:24 +08:00
|
|
|
CallOpcode = X86::CALL64r;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unsupported operand type in statepoint call target");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Emit call
|
|
|
|
MCInst CallInst;
|
|
|
|
CallInst.setOpcode(CallOpcode);
|
|
|
|
CallInst.addOperand(CallTargetMCOp);
|
2020-02-14 13:58:16 +08:00
|
|
|
OutStreamer->emitInstruction(CallInst, getSubtargetInfo());
|
2015-05-13 07:52:24 +08:00
|
|
|
}
|
2014-12-02 06:52:56 +08:00
|
|
|
|
|
|
|
// Record our statepoint node in the same section used by STACKMAP
|
|
|
|
// and PATCHPOINT
|
2019-12-20 06:03:19 +08:00
|
|
|
auto &Ctx = OutStreamer->getContext();
|
|
|
|
MCSymbol *MILabel = Ctx.createTempSymbol();
|
2020-02-15 11:21:58 +08:00
|
|
|
OutStreamer->emitLabel(MILabel);
|
2019-12-20 06:03:19 +08:00
|
|
|
SM.recordStatepoint(*MILabel, MI);
|
2014-12-02 06:52:56 +08:00
|
|
|
}
|
|
|
|
|
2017-02-08 03:19:49 +08:00
|
|
|
void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
|
|
|
|
X86MCInstLower &MCIL) {
|
|
|
|
// FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
|
|
|
|
// <opcode>, <operands>
|
2015-06-16 02:44:08 +08:00
|
|
|
|
2020-01-09 01:58:42 +08:00
|
|
|
NoAutoPaddingScope NoPadScope(*OutStreamer);
|
|
|
|
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register DefRegister = FaultingMI.getOperand(0).getReg();
|
2017-02-08 03:19:49 +08:00
|
|
|
FaultMaps::FaultKind FK =
|
|
|
|
static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
|
|
|
|
MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
|
|
|
|
unsigned Opcode = FaultingMI.getOperand(3).getImm();
|
|
|
|
unsigned OperandsBeginIdx = 4;
|
2015-06-16 02:44:08 +08:00
|
|
|
|
2019-12-20 04:01:51 +08:00
|
|
|
auto &Ctx = OutStreamer->getContext();
|
|
|
|
MCSymbol *FaultingLabel = Ctx.createTempSymbol();
|
2020-02-15 11:21:58 +08:00
|
|
|
OutStreamer->emitLabel(FaultingLabel);
|
2019-12-20 04:01:51 +08:00
|
|
|
|
2017-02-08 03:19:49 +08:00
|
|
|
assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
|
2019-12-20 04:01:51 +08:00
|
|
|
FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
|
2015-06-16 02:44:08 +08:00
|
|
|
|
2017-02-08 03:19:49 +08:00
|
|
|
MCInst MI;
|
|
|
|
MI.setOpcode(Opcode);
|
2015-07-21 04:31:39 +08:00
|
|
|
|
2017-02-08 03:19:49 +08:00
|
|
|
if (DefRegister != X86::NoRegister)
|
|
|
|
MI.addOperand(MCOperand::createReg(DefRegister));
|
2015-07-21 04:31:39 +08:00
|
|
|
|
2017-02-08 03:19:49 +08:00
|
|
|
for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
|
|
|
|
E = FaultingMI.operands_end();
|
2015-06-16 02:44:08 +08:00
|
|
|
I != E; ++I)
|
2017-02-08 03:19:49 +08:00
|
|
|
if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
|
|
|
|
MI.addOperand(MaybeOperand.getValue());
|
2015-06-16 02:44:08 +08:00
|
|
|
|
2019-03-13 05:05:31 +08:00
|
|
|
OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
|
2020-02-14 13:58:16 +08:00
|
|
|
OutStreamer->emitInstruction(MI, getSubtargetInfo());
|
2015-06-16 02:44:08 +08:00
|
|
|
}
|
2014-12-02 06:52:56 +08:00
|
|
|
|
2017-02-01 01:00:27 +08:00
|
|
|
void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
|
|
|
|
X86MCInstLower &MCIL) {
|
|
|
|
bool Is64Bits = Subtarget->is64Bit();
|
|
|
|
MCContext &Ctx = OutStreamer->getContext();
|
|
|
|
MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
|
|
|
|
const MCSymbolRefExpr *Op =
|
|
|
|
MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
|
|
|
|
|
|
|
|
EmitAndCountInstruction(
|
|
|
|
MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
|
|
|
|
.addExpr(Op));
|
|
|
|
}
|
|
|
|
|
2016-04-19 13:24:47 +08:00
|
|
|
void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
|
|
|
|
X86MCInstLower &MCIL) {
|
|
|
|
// PATCHABLE_OP minsize, opcode, operands
|
|
|
|
|
2020-01-09 01:58:42 +08:00
|
|
|
NoAutoPaddingScope NoPadScope(*OutStreamer);
|
|
|
|
|
2016-04-19 13:24:47 +08:00
|
|
|
unsigned MinSize = MI.getOperand(0).getImm();
|
|
|
|
unsigned Opcode = MI.getOperand(1).getImm();
|
|
|
|
|
|
|
|
MCInst MCI;
|
|
|
|
MCI.setOpcode(Opcode);
|
2021-01-15 12:30:32 +08:00
|
|
|
for (auto &MO : drop_begin(MI.operands(), 2))
|
2016-04-19 13:24:47 +08:00
|
|
|
if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
|
|
|
|
MCI.addOperand(MaybeOperand.getValue());
|
|
|
|
|
|
|
|
SmallString<256> Code;
|
|
|
|
SmallVector<MCFixup, 4> Fixups;
|
|
|
|
raw_svector_ostream VecOS(Code);
|
|
|
|
CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
|
|
|
|
|
|
|
|
if (Code.size() < MinSize) {
|
2020-06-18 01:26:10 +08:00
|
|
|
if (MinSize == 2 && Subtarget->is32Bit() &&
|
|
|
|
Subtarget->isTargetWindowsMSVC() &&
|
|
|
|
(Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
|
|
|
|
// For compatibilty reasons, when targetting MSVC, is is important to
|
|
|
|
// generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
|
|
|
|
// rely specifically on this pattern to be able to patch a function.
|
|
|
|
// This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
|
|
|
|
OutStreamer->emitInstruction(
|
|
|
|
MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
|
|
|
|
*Subtarget);
|
|
|
|
} else if (MinSize == 2 && Opcode == X86::PUSH64r) {
|
2016-04-19 13:24:47 +08:00
|
|
|
// This is an optimization that lets us get away without emitting a nop in
|
|
|
|
// many cases.
|
|
|
|
//
|
2017-11-29 01:15:09 +08:00
|
|
|
// NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
|
2016-04-19 13:24:47 +08:00
|
|
|
// bytes too, so the check on MinSize is important.
|
|
|
|
MCI.setOpcode(X86::PUSH64rmr);
|
|
|
|
} else {
|
2020-06-18 00:08:12 +08:00
|
|
|
unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget);
|
2016-04-20 02:48:13 +08:00
|
|
|
assert(NopSize == MinSize && "Could not implement MinSize!");
|
2018-04-18 05:30:29 +08:00
|
|
|
(void)NopSize;
|
2016-04-19 13:24:47 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-14 13:58:16 +08:00
|
|
|
OutStreamer->emitInstruction(MCI, getSubtargetInfo());
|
2016-04-19 13:24:47 +08:00
|
|
|
}
|
|
|
|
|
2013-11-19 11:29:56 +08:00
|
|
|
// Lower a stackmap of the form:
|
|
|
|
// <id>, <shadowBytes>, ...
|
2014-07-25 04:40:55 +08:00
|
|
|
void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
|
2015-04-25 03:11:51 +08:00
|
|
|
SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
|
2019-12-20 06:03:19 +08:00
|
|
|
|
|
|
|
auto &Ctx = OutStreamer->getContext();
|
|
|
|
MCSymbol *MILabel = Ctx.createTempSymbol();
|
2020-02-15 11:21:58 +08:00
|
|
|
OutStreamer->emitLabel(MILabel);
|
2019-12-20 06:03:19 +08:00
|
|
|
|
|
|
|
SM.recordStackMap(*MILabel, MI);
|
2014-07-25 04:40:55 +08:00
|
|
|
unsigned NumShadowBytes = MI.getOperand(1).getImm();
|
|
|
|
SMShadowTracker.reset(NumShadowBytes);
|
2013-11-01 06:11:56 +08:00
|
|
|
}
|
|
|
|
|
2013-11-14 14:54:10 +08:00
|
|
|
// Lower a patchpoint of the form:
|
2013-11-19 11:29:56 +08:00
|
|
|
// [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
|
2015-04-22 14:02:31 +08:00
|
|
|
void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
|
|
|
|
X86MCInstLower &MCIL) {
|
2014-07-25 04:40:55 +08:00
|
|
|
assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
|
|
|
|
|
2015-04-25 03:11:51 +08:00
|
|
|
SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
|
2014-07-25 04:40:55 +08:00
|
|
|
|
2020-01-09 01:58:42 +08:00
|
|
|
NoAutoPaddingScope NoPadScope(*OutStreamer);
|
|
|
|
|
2019-12-20 06:03:19 +08:00
|
|
|
auto &Ctx = OutStreamer->getContext();
|
|
|
|
MCSymbol *MILabel = Ctx.createTempSymbol();
|
2020-02-15 11:21:58 +08:00
|
|
|
OutStreamer->emitLabel(MILabel);
|
2019-12-20 06:03:19 +08:00
|
|
|
SM.recordPatchPoint(*MILabel, MI);
|
2013-11-01 06:11:56 +08:00
|
|
|
|
2013-11-19 11:29:56 +08:00
|
|
|
PatchPointOpers opers(&MI);
|
|
|
|
unsigned ScratchIdx = opers.getNextScratchIdx();
|
2013-11-14 14:54:10 +08:00
|
|
|
unsigned EncodedBytes = 0;
|
2016-08-24 07:33:29 +08:00
|
|
|
const MachineOperand &CalleeMO = opers.getCallTarget();
|
2015-04-22 14:02:31 +08:00
|
|
|
|
|
|
|
// Check for null target. If target is non-null (i.e. is non-zero or is
|
|
|
|
// symbolic) then emit a call.
|
|
|
|
if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
|
|
|
|
MCOperand CalleeMCOp;
|
|
|
|
switch (CalleeMO.getType()) {
|
|
|
|
default:
|
|
|
|
/// FIXME: Add a verifier check for bad callee types.
|
|
|
|
llvm_unreachable("Unrecognized callee operand type.");
|
|
|
|
case MachineOperand::MO_Immediate:
|
|
|
|
if (CalleeMO.getImm())
|
2015-05-14 02:37:00 +08:00
|
|
|
CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
|
2015-04-22 14:02:31 +08:00
|
|
|
break;
|
|
|
|
case MachineOperand::MO_ExternalSymbol:
|
|
|
|
case MachineOperand::MO_GlobalAddress:
|
2018-04-18 05:30:29 +08:00
|
|
|
CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
|
|
|
|
MCIL.GetSymbolFromOperand(CalleeMO));
|
2015-04-22 14:02:31 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2013-11-14 14:54:10 +08:00
|
|
|
// Emit MOV to materialize the target address and the CALL to target.
|
|
|
|
// This is encoded with 12-13 bytes, depending on which register is used.
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register ScratchReg = MI.getOperand(ScratchIdx).getReg();
|
2013-12-04 08:39:08 +08:00
|
|
|
if (X86II::isX86_64ExtendedReg(ScratchReg))
|
|
|
|
EncodedBytes = 13;
|
|
|
|
else
|
|
|
|
EncodedBytes = 12;
|
2015-04-22 14:02:31 +08:00
|
|
|
|
|
|
|
EmitAndCountInstruction(
|
|
|
|
MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
|
Introduce the "retpoline" x86 mitigation technique for variant #2 of the speculative execution vulnerabilities disclosed today, specifically identified by CVE-2017-5715, "Branch Target Injection", and is one of the two halves to Spectre..
Summary:
First, we need to explain the core of the vulnerability. Note that this
is a very incomplete description, please see the Project Zero blog post
for details:
https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
The basis for branch target injection is to direct speculative execution
of the processor to some "gadget" of executable code by poisoning the
prediction of indirect branches with the address of that gadget. The
gadget in turn contains an operation that provides a side channel for
reading data. Most commonly, this will look like a load of secret data
followed by a branch on the loaded value and then a load of some
predictable cache line. The attacker then uses timing of the processors
cache to determine which direction the branch took *in the speculative
execution*, and in turn what one bit of the loaded value was. Due to the
nature of these timing side channels and the branch predictor on Intel
processors, this allows an attacker to leak data only accessible to
a privileged domain (like the kernel) back into an unprivileged domain.
The goal is simple: avoid generating code which contains an indirect
branch that could have its prediction poisoned by an attacker. In many
cases, the compiler can simply use directed conditional branches and
a small search tree. LLVM already has support for lowering switches in
this way and the first step of this patch is to disable jump-table
lowering of switches and introduce a pass to rewrite explicit indirectbr
sequences into a switch over integers.
However, there is no fully general alternative to indirect calls. We
introduce a new construct we call a "retpoline" to implement indirect
calls in a non-speculatable way. It can be thought of loosely as
a trampoline for indirect calls which uses the RET instruction on x86.
Further, we arrange for a specific call->ret sequence which ensures the
processor predicts the return to go to a controlled, known location. The
retpoline then "smashes" the return address pushed onto the stack by the
call with the desired target of the original indirect call. The result
is a predicted return to the next instruction after a call (which can be
used to trap speculative execution within an infinite loop) and an
actual indirect branch to an arbitrary address.
On 64-bit x86 ABIs, this is especially easily done in the compiler by
using a guaranteed scratch register to pass the target into this device.
For 32-bit ABIs there isn't a guaranteed scratch register and so several
different retpoline variants are introduced to use a scratch register if
one is available in the calling convention and to otherwise use direct
stack push/pop sequences to pass the target address.
This "retpoline" mitigation is fully described in the following blog
post: https://support.google.com/faqs/answer/7625886
We also support a target feature that disables emission of the retpoline
thunk by the compiler to allow for custom thunks if users want them.
These are particularly useful in environments like kernels that
routinely do hot-patching on boot and want to hot-patch their thunk to
different code sequences. They can write this custom thunk and use
`-mretpoline-external-thunk` *in addition* to `-mretpoline`. In this
case, on x86-64 thu thunk names must be:
```
__llvm_external_retpoline_r11
```
or on 32-bit:
```
__llvm_external_retpoline_eax
__llvm_external_retpoline_ecx
__llvm_external_retpoline_edx
__llvm_external_retpoline_push
```
And the target of the retpoline is passed in the named register, or in
the case of the `push` suffix on the top of the stack via a `pushl`
instruction.
There is one other important source of indirect branches in x86 ELF
binaries: the PLT. These patches also include support for LLD to
generate PLT entries that perform a retpoline-style indirection.
The only other indirect branches remaining that we are aware of are from
precompiled runtimes (such as crt0.o and similar). The ones we have
found are not really attackable, and so we have not focused on them
here, but eventually these runtimes should also be replicated for
retpoline-ed configurations for completeness.
For kernels or other freestanding or fully static executables, the
compiler switch `-mretpoline` is sufficient to fully mitigate this
particular attack. For dynamic executables, you must compile *all*
libraries with `-mretpoline` and additionally link the dynamic
executable and all shared libraries with LLD and pass `-z retpolineplt`
(or use similar functionality from some other linker). We strongly
recommend also using `-z now` as non-lazy binding allows the
retpoline-mitigated PLT to be substantially smaller.
When manually apply similar transformations to `-mretpoline` to the
Linux kernel we observed very small performance hits to applications
running typical workloads, and relatively minor hits (approximately 2%)
even for extremely syscall-heavy applications. This is largely due to
the small number of indirect branches that occur in performance
sensitive paths of the kernel.
When using these patches on statically linked applications, especially
C++ applications, you should expect to see a much more dramatic
performance hit. For microbenchmarks that are switch, indirect-, or
virtual-call heavy we have seen overheads ranging from 10% to 50%.
However, real-world workloads exhibit substantially lower performance
impact. Notably, techniques such as PGO and ThinLTO dramatically reduce
the impact of hot indirect calls (by speculatively promoting them to
direct calls) and allow optimized search trees to be used to lower
switches. If you need to deploy these techniques in C++ applications, we
*strongly* recommend that you ensure all hot call targets are statically
linked (avoiding PLT indirection) and use both PGO and ThinLTO. Well
tuned servers using all of these techniques saw 5% - 10% overhead from
the use of retpoline.
We will add detailed documentation covering these components in
subsequent patches, but wanted to make the core functionality available
as soon as possible. Happy for more code review, but we'd really like to
get these patches landed and backported ASAP for obvious reasons. We're
planning to backport this to both 6.0 and 5.0 release streams and get
a 5.0 release with just this cherry picked ASAP for distros and vendors.
This patch is the work of a number of people over the past month: Eric, Reid,
Rui, and myself. I'm mailing it out as a single commit due to the time
sensitive nature of landing this and the need to backport it. Huge thanks to
everyone who helped out here, and everyone at Intel who helped out in
discussions about how to craft this. Also, credit goes to Paul Turner (at
Google, but not an LLVM contributor) for much of the underlying retpoline
design.
Reviewers: echristo, rnk, ruiu, craig.topper, DavidKreitzer
Subscribers: sanjoy, emaste, mcrosier, mgorny, mehdi_amini, hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D41723
llvm-svn: 323155
2018-01-23 06:05:25 +08:00
|
|
|
// FIXME: Add retpoline support and remove this.
|
2020-04-03 12:00:44 +08:00
|
|
|
if (Subtarget->useIndirectThunkCalls())
|
Introduce the "retpoline" x86 mitigation technique for variant #2 of the speculative execution vulnerabilities disclosed today, specifically identified by CVE-2017-5715, "Branch Target Injection", and is one of the two halves to Spectre..
Summary:
First, we need to explain the core of the vulnerability. Note that this
is a very incomplete description, please see the Project Zero blog post
for details:
https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
The basis for branch target injection is to direct speculative execution
of the processor to some "gadget" of executable code by poisoning the
prediction of indirect branches with the address of that gadget. The
gadget in turn contains an operation that provides a side channel for
reading data. Most commonly, this will look like a load of secret data
followed by a branch on the loaded value and then a load of some
predictable cache line. The attacker then uses timing of the processors
cache to determine which direction the branch took *in the speculative
execution*, and in turn what one bit of the loaded value was. Due to the
nature of these timing side channels and the branch predictor on Intel
processors, this allows an attacker to leak data only accessible to
a privileged domain (like the kernel) back into an unprivileged domain.
The goal is simple: avoid generating code which contains an indirect
branch that could have its prediction poisoned by an attacker. In many
cases, the compiler can simply use directed conditional branches and
a small search tree. LLVM already has support for lowering switches in
this way and the first step of this patch is to disable jump-table
lowering of switches and introduce a pass to rewrite explicit indirectbr
sequences into a switch over integers.
However, there is no fully general alternative to indirect calls. We
introduce a new construct we call a "retpoline" to implement indirect
calls in a non-speculatable way. It can be thought of loosely as
a trampoline for indirect calls which uses the RET instruction on x86.
Further, we arrange for a specific call->ret sequence which ensures the
processor predicts the return to go to a controlled, known location. The
retpoline then "smashes" the return address pushed onto the stack by the
call with the desired target of the original indirect call. The result
is a predicted return to the next instruction after a call (which can be
used to trap speculative execution within an infinite loop) and an
actual indirect branch to an arbitrary address.
On 64-bit x86 ABIs, this is especially easily done in the compiler by
using a guaranteed scratch register to pass the target into this device.
For 32-bit ABIs there isn't a guaranteed scratch register and so several
different retpoline variants are introduced to use a scratch register if
one is available in the calling convention and to otherwise use direct
stack push/pop sequences to pass the target address.
This "retpoline" mitigation is fully described in the following blog
post: https://support.google.com/faqs/answer/7625886
We also support a target feature that disables emission of the retpoline
thunk by the compiler to allow for custom thunks if users want them.
These are particularly useful in environments like kernels that
routinely do hot-patching on boot and want to hot-patch their thunk to
different code sequences. They can write this custom thunk and use
`-mretpoline-external-thunk` *in addition* to `-mretpoline`. In this
case, on x86-64 thu thunk names must be:
```
__llvm_external_retpoline_r11
```
or on 32-bit:
```
__llvm_external_retpoline_eax
__llvm_external_retpoline_ecx
__llvm_external_retpoline_edx
__llvm_external_retpoline_push
```
And the target of the retpoline is passed in the named register, or in
the case of the `push` suffix on the top of the stack via a `pushl`
instruction.
There is one other important source of indirect branches in x86 ELF
binaries: the PLT. These patches also include support for LLD to
generate PLT entries that perform a retpoline-style indirection.
The only other indirect branches remaining that we are aware of are from
precompiled runtimes (such as crt0.o and similar). The ones we have
found are not really attackable, and so we have not focused on them
here, but eventually these runtimes should also be replicated for
retpoline-ed configurations for completeness.
For kernels or other freestanding or fully static executables, the
compiler switch `-mretpoline` is sufficient to fully mitigate this
particular attack. For dynamic executables, you must compile *all*
libraries with `-mretpoline` and additionally link the dynamic
executable and all shared libraries with LLD and pass `-z retpolineplt`
(or use similar functionality from some other linker). We strongly
recommend also using `-z now` as non-lazy binding allows the
retpoline-mitigated PLT to be substantially smaller.
When manually apply similar transformations to `-mretpoline` to the
Linux kernel we observed very small performance hits to applications
running typical workloads, and relatively minor hits (approximately 2%)
even for extremely syscall-heavy applications. This is largely due to
the small number of indirect branches that occur in performance
sensitive paths of the kernel.
When using these patches on statically linked applications, especially
C++ applications, you should expect to see a much more dramatic
performance hit. For microbenchmarks that are switch, indirect-, or
virtual-call heavy we have seen overheads ranging from 10% to 50%.
However, real-world workloads exhibit substantially lower performance
impact. Notably, techniques such as PGO and ThinLTO dramatically reduce
the impact of hot indirect calls (by speculatively promoting them to
direct calls) and allow optimized search trees to be used to lower
switches. If you need to deploy these techniques in C++ applications, we
*strongly* recommend that you ensure all hot call targets are statically
linked (avoiding PLT indirection) and use both PGO and ThinLTO. Well
tuned servers using all of these techniques saw 5% - 10% overhead from
the use of retpoline.
We will add detailed documentation covering these components in
subsequent patches, but wanted to make the core functionality available
as soon as possible. Happy for more code review, but we'd really like to
get these patches landed and backported ASAP for obvious reasons. We're
planning to backport this to both 6.0 and 5.0 release streams and get
a 5.0 release with just this cherry picked ASAP for distros and vendors.
This patch is the work of a number of people over the past month: Eric, Reid,
Rui, and myself. I'm mailing it out as a single commit due to the time
sensitive nature of landing this and the need to backport it. Huge thanks to
everyone who helped out here, and everyone at Intel who helped out in
discussions about how to craft this. Also, credit goes to Paul Turner (at
Google, but not an LLVM contributor) for much of the underlying retpoline
design.
Reviewers: echristo, rnk, ruiu, craig.topper, DavidKreitzer
Subscribers: sanjoy, emaste, mcrosier, mgorny, mehdi_amini, hiraditya, llvm-commits
Differential Revision: https://reviews.llvm.org/D41723
llvm-svn: 323155
2018-01-23 06:05:25 +08:00
|
|
|
report_fatal_error(
|
2020-04-03 12:00:44 +08:00
|
|
|
"Lowering patchpoint with thunks not yet implemented.");
|
2014-07-25 04:40:55 +08:00
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
|
2013-11-14 14:54:10 +08:00
|
|
|
}
|
2015-04-22 14:02:31 +08:00
|
|
|
|
2013-11-01 06:11:56 +08:00
|
|
|
// Emit padding.
|
2016-08-24 07:33:29 +08:00
|
|
|
unsigned NumBytes = opers.getNumPatchBytes();
|
2013-11-19 11:29:56 +08:00
|
|
|
assert(NumBytes >= EncodedBytes &&
|
2013-11-01 06:11:56 +08:00
|
|
|
"Patchpoint can't request size less than the length of a call.");
|
|
|
|
|
2020-06-18 00:08:12 +08:00
|
|
|
emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget);
|
2013-11-01 06:11:56 +08:00
|
|
|
}
|
|
|
|
|
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.
Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.
Reviewers: timshen, dberris
Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D27503
llvm-svn: 302405
2017-05-08 13:45:21 +08:00
|
|
|
void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
|
|
|
|
X86MCInstLower &MCIL) {
|
2017-07-02 11:24:54 +08:00
|
|
|
assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
|
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.
Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.
Reviewers: timshen, dberris
Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D27503
llvm-svn: 302405
2017-05-08 13:45:21 +08:00
|
|
|
|
2020-01-09 01:58:42 +08:00
|
|
|
NoAutoPaddingScope NoPadScope(*OutStreamer);
|
|
|
|
|
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.
Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.
Reviewers: timshen, dberris
Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D27503
llvm-svn: 302405
2017-05-08 13:45:21 +08:00
|
|
|
// We want to emit the following pattern, which follows the x86 calling
|
|
|
|
// convention to prepare for the trampoline call to be patched in.
|
|
|
|
//
|
|
|
|
// .p2align 1, ...
|
|
|
|
// .Lxray_event_sled_N:
|
2017-09-04 13:34:58 +08:00
|
|
|
// jmp +N // jump across the instrumentation sled
|
|
|
|
// ... // set up arguments in register
|
|
|
|
// callq __xray_CustomEvent@plt // force dependency to symbol
|
|
|
|
// ...
|
|
|
|
// <jump here>
|
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.
Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.
Reviewers: timshen, dberris
Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D27503
llvm-svn: 302405
2017-05-08 13:45:21 +08:00
|
|
|
//
|
|
|
|
// After patching, it would look something like:
|
|
|
|
//
|
|
|
|
// nopw (2-byte nop)
|
2017-09-04 13:34:58 +08:00
|
|
|
// ...
|
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.
Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.
Reviewers: timshen, dberris
Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D27503
llvm-svn: 302405
2017-05-08 13:45:21 +08:00
|
|
|
// callq __xrayCustomEvent // already lowered
|
2017-09-04 13:34:58 +08:00
|
|
|
// ...
|
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.
Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.
Reviewers: timshen, dberris
Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D27503
llvm-svn: 302405
2017-05-08 13:45:21 +08:00
|
|
|
//
|
|
|
|
// ---
|
|
|
|
// First we emit the label and the jump.
|
|
|
|
auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
|
|
|
|
OutStreamer->AddComment("# XRay Custom Event Log");
|
2020-02-15 11:21:58 +08:00
|
|
|
OutStreamer->emitCodeAlignment(2);
|
|
|
|
OutStreamer->emitLabel(CurSled);
|
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.
Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.
Reviewers: timshen, dberris
Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D27503
llvm-svn: 302405
2017-05-08 13:45:21 +08:00
|
|
|
|
|
|
|
// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
|
|
|
|
// an operand (computed as an offset from the jmp instruction).
|
|
|
|
// FIXME: Find another less hacky way do force the relative jump.
|
2020-02-15 10:16:24 +08:00
|
|
|
OutStreamer->emitBinaryData("\xeb\x0f");
|
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.
Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.
Reviewers: timshen, dberris
Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D27503
llvm-svn: 302405
2017-05-08 13:45:21 +08:00
|
|
|
|
|
|
|
// The default C calling convention will place two arguments into %rcx and
|
|
|
|
// %rdx -- so we only work with those.
|
2019-10-28 08:13:50 +08:00
|
|
|
const Register DestRegs[] = {X86::RDI, X86::RSI};
|
2017-09-04 13:34:58 +08:00
|
|
|
bool UsedMask[] = {false, false};
|
2018-04-18 05:30:29 +08:00
|
|
|
// Filled out in loop.
|
2019-10-28 08:13:50 +08:00
|
|
|
Register SrcRegs[] = {0, 0};
|
2017-09-04 13:34:58 +08:00
|
|
|
|
|
|
|
// Then we put the operands in the %rdi and %rsi registers. We spill the
|
|
|
|
// values in the register before we clobber them, and mark them as used in
|
|
|
|
// UsedMask. In case the arguments are already in the correct register, we use
|
|
|
|
// emit nops appropriately sized to keep the sled the same size in every
|
|
|
|
// situation.
|
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.
Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.
Reviewers: timshen, dberris
Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D27503
llvm-svn: 302405
2017-05-08 13:45:21 +08:00
|
|
|
for (unsigned I = 0; I < MI.getNumOperands(); ++I)
|
|
|
|
if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
|
2017-09-04 13:34:58 +08:00
|
|
|
assert(Op->isReg() && "Only support arguments in registers");
|
2019-10-28 08:13:50 +08:00
|
|
|
SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
|
2018-04-18 05:30:29 +08:00
|
|
|
if (SrcRegs[I] != DestRegs[I]) {
|
2017-09-04 13:34:58 +08:00
|
|
|
UsedMask[I] = true;
|
|
|
|
EmitAndCountInstruction(
|
2018-04-18 05:30:29 +08:00
|
|
|
MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
|
2017-09-04 13:34:58 +08:00
|
|
|
} else {
|
2020-06-18 00:08:12 +08:00
|
|
|
emitX86Nops(*OutStreamer, 4, Subtarget);
|
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.
Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.
Reviewers: timshen, dberris
Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D27503
llvm-svn: 302405
2017-05-08 13:45:21 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-18 05:30:29 +08:00
|
|
|
// Now that the register values are stashed, mov arguments into place.
|
2019-10-28 08:13:50 +08:00
|
|
|
// FIXME: This doesn't work if one of the later SrcRegs is equal to an
|
|
|
|
// earlier DestReg. We will have already overwritten over the register before
|
|
|
|
// we can copy from it.
|
2018-04-18 05:30:29 +08:00
|
|
|
for (unsigned I = 0; I < MI.getNumOperands(); ++I)
|
|
|
|
if (SrcRegs[I] != DestRegs[I])
|
|
|
|
EmitAndCountInstruction(
|
|
|
|
MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
|
|
|
|
|
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.
Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.
Reviewers: timshen, dberris
Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D27503
llvm-svn: 302405
2017-05-08 13:45:21 +08:00
|
|
|
// We emit a hard dependency on the __xray_CustomEvent symbol, which is the
|
2017-09-04 13:34:58 +08:00
|
|
|
// name of the trampoline to be implemented by the XRay runtime.
|
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.
Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.
Reviewers: timshen, dberris
Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D27503
llvm-svn: 302405
2017-05-08 13:45:21 +08:00
|
|
|
auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
|
|
|
|
MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
|
2017-09-04 13:34:58 +08:00
|
|
|
if (isPositionIndependent())
|
|
|
|
TOp.setTargetFlags(X86II::MO_PLT);
|
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.
Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.
Reviewers: timshen, dberris
Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D27503
llvm-svn: 302405
2017-05-08 13:45:21 +08:00
|
|
|
|
|
|
|
// Emit the call instruction.
|
2017-09-04 13:34:58 +08:00
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
|
|
|
|
.addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
|
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.
Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.
Reviewers: timshen, dberris
Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D27503
llvm-svn: 302405
2017-05-08 13:45:21 +08:00
|
|
|
|
|
|
|
// Restore caller-saved and used registers.
|
2017-09-04 13:34:58 +08:00
|
|
|
for (unsigned I = sizeof UsedMask; I-- > 0;)
|
|
|
|
if (UsedMask[I])
|
2018-04-18 05:30:29 +08:00
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
|
2017-09-04 13:34:58 +08:00
|
|
|
else
|
2020-06-18 00:08:12 +08:00
|
|
|
emitX86Nops(*OutStreamer, 1, Subtarget);
|
2017-09-04 13:34:58 +08:00
|
|
|
|
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.
Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.
Reviewers: timshen, dberris
Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D27503
llvm-svn: 302405
2017-05-08 13:45:21 +08:00
|
|
|
OutStreamer->AddComment("xray custom event end.");
|
|
|
|
|
2020-04-14 13:28:16 +08:00
|
|
|
// Record the sled version. Version 0 of this sled was spelled differently, so
|
|
|
|
// we let the runtime handle the different offsets we're using. Version 2
|
|
|
|
// changed the absolute address to a PC-relative address.
|
|
|
|
recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);
|
[XRay] Custom event logging intrinsic
This patch introduces an LLVM intrinsic and a target opcode for custom event
logging in XRay. Initially, its use case will be to allow users of XRay to log
some type of string ("poor man's printf"). The target opcode compiles to a noop
sled large enough to enable calling through to a runtime-determined relative
function call. At runtime, when X-Ray is enabled, the sled is replaced by
compiler-rt with a trampoline to the logic for creating the custom log entries.
Future patches will implement the compiler-rt parts and clang-side support for
emitting the IR corresponding to this intrinsic.
Reviewers: timshen, dberris
Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D27503
llvm-svn: 302405
2017-05-08 13:45:21 +08:00
|
|
|
}
|
|
|
|
|
2018-04-18 05:30:29 +08:00
|
|
|
void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
|
|
|
|
X86MCInstLower &MCIL) {
|
|
|
|
assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
|
|
|
|
|
2020-01-09 01:58:42 +08:00
|
|
|
NoAutoPaddingScope NoPadScope(*OutStreamer);
|
|
|
|
|
2018-04-18 05:30:29 +08:00
|
|
|
// We want to emit the following pattern, which follows the x86 calling
|
|
|
|
// convention to prepare for the trampoline call to be patched in.
|
|
|
|
//
|
|
|
|
// .p2align 1, ...
|
|
|
|
// .Lxray_event_sled_N:
|
|
|
|
// jmp +N // jump across the instrumentation sled
|
|
|
|
// ... // set up arguments in register
|
|
|
|
// callq __xray_TypedEvent@plt // force dependency to symbol
|
|
|
|
// ...
|
|
|
|
// <jump here>
|
|
|
|
//
|
|
|
|
// After patching, it would look something like:
|
|
|
|
//
|
|
|
|
// nopw (2-byte nop)
|
|
|
|
// ...
|
|
|
|
// callq __xrayTypedEvent // already lowered
|
|
|
|
// ...
|
|
|
|
//
|
|
|
|
// ---
|
|
|
|
// First we emit the label and the jump.
|
|
|
|
auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
|
|
|
|
OutStreamer->AddComment("# XRay Typed Event Log");
|
2020-02-15 11:21:58 +08:00
|
|
|
OutStreamer->emitCodeAlignment(2);
|
|
|
|
OutStreamer->emitLabel(CurSled);
|
2018-04-18 05:30:29 +08:00
|
|
|
|
|
|
|
// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
|
|
|
|
// an operand (computed as an offset from the jmp instruction).
|
|
|
|
// FIXME: Find another less hacky way do force the relative jump.
|
2020-02-15 10:16:24 +08:00
|
|
|
OutStreamer->emitBinaryData("\xeb\x14");
|
2018-04-18 05:30:29 +08:00
|
|
|
|
|
|
|
// An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
|
|
|
|
// so we'll work with those. Or we may be called via SystemV, in which case
|
|
|
|
// we don't have to do any translation.
|
2019-10-28 08:13:50 +08:00
|
|
|
const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
|
2018-04-18 05:30:29 +08:00
|
|
|
bool UsedMask[] = {false, false, false};
|
|
|
|
|
|
|
|
// Will fill out src regs in the loop.
|
2019-10-28 08:13:50 +08:00
|
|
|
Register SrcRegs[] = {0, 0, 0};
|
2018-04-18 05:30:29 +08:00
|
|
|
|
|
|
|
// Then we put the operands in the SystemV registers. We spill the values in
|
|
|
|
// the registers before we clobber them, and mark them as used in UsedMask.
|
|
|
|
// In case the arguments are already in the correct register, we emit nops
|
|
|
|
// appropriately sized to keep the sled the same size in every situation.
|
|
|
|
for (unsigned I = 0; I < MI.getNumOperands(); ++I)
|
|
|
|
if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
|
|
|
|
// TODO: Is register only support adequate?
|
|
|
|
assert(Op->isReg() && "Only supports arguments in registers");
|
2019-10-28 08:13:50 +08:00
|
|
|
SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
|
2018-04-18 05:30:29 +08:00
|
|
|
if (SrcRegs[I] != DestRegs[I]) {
|
|
|
|
UsedMask[I] = true;
|
|
|
|
EmitAndCountInstruction(
|
|
|
|
MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
|
|
|
|
} else {
|
2020-06-18 00:08:12 +08:00
|
|
|
emitX86Nops(*OutStreamer, 4, Subtarget);
|
2018-04-18 05:30:29 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// In the above loop we only stash all of the destination registers or emit
|
|
|
|
// nops if the arguments are already in the right place. Doing the actually
|
|
|
|
// moving is postponed until after all the registers are stashed so nothing
|
|
|
|
// is clobbers. We've already added nops to account for the size of mov and
|
|
|
|
// push if the register is in the right place, so we only have to worry about
|
|
|
|
// emitting movs.
|
2019-10-28 08:13:50 +08:00
|
|
|
// FIXME: This doesn't work if one of the later SrcRegs is equal to an
|
|
|
|
// earlier DestReg. We will have already overwritten over the register before
|
|
|
|
// we can copy from it.
|
2018-04-18 05:30:29 +08:00
|
|
|
for (unsigned I = 0; I < MI.getNumOperands(); ++I)
|
|
|
|
if (UsedMask[I])
|
|
|
|
EmitAndCountInstruction(
|
|
|
|
MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
|
|
|
|
|
|
|
|
// We emit a hard dependency on the __xray_TypedEvent symbol, which is the
|
|
|
|
// name of the trampoline to be implemented by the XRay runtime.
|
|
|
|
auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
|
|
|
|
MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
|
|
|
|
if (isPositionIndependent())
|
|
|
|
TOp.setTargetFlags(X86II::MO_PLT);
|
|
|
|
|
|
|
|
// Emit the call instruction.
|
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
|
|
|
|
.addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
|
|
|
|
|
|
|
|
// Restore caller-saved and used registers.
|
|
|
|
for (unsigned I = sizeof UsedMask; I-- > 0;)
|
|
|
|
if (UsedMask[I])
|
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
|
|
|
|
else
|
2020-06-18 00:08:12 +08:00
|
|
|
emitX86Nops(*OutStreamer, 1, Subtarget);
|
2018-04-18 05:30:29 +08:00
|
|
|
|
|
|
|
OutStreamer->AddComment("xray typed event end.");
|
|
|
|
|
|
|
|
// Record the sled version.
|
2020-04-14 13:28:16 +08:00
|
|
|
recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);
|
2018-04-18 05:30:29 +08:00
|
|
|
}
|
|
|
|
|
XRay: Add entry and exit sleds
Summary:
In this patch we implement the following parts of XRay:
- Supporting a function attribute named 'function-instrument' which currently only supports 'xray-always'. We should be able to use this attribute for other instrumentation approaches.
- Supporting a function attribute named 'xray-instruction-threshold' used to determine whether a function is instrumented with a minimum number of instructions (IR instruction counts).
- X86-specific nop sleds as described in the white paper.
- A machine function pass that adds the different instrumentation marker instructions at a very late stage.
- A way of identifying which return opcode is considered "normal" for each architecture.
There are some caveats here:
1) We don't handle PATCHABLE_RET in platforms other than x86_64 yet -- this means if IR used PATCHABLE_RET directly instead of a normal ret, instruction lowering for that platform might do the wrong thing. We think this should be handled at instruction selection time to by default be unpacked for platforms where XRay is not availble yet.
2) The generated section for X86 is different from what is described from the white paper for the sole reason that LLVM allows us to do this neatly. We're taking the opportunity to deviate from the white paper from this perspective to allow us to get richer information from the runtime library.
Reviewers: sanjoy, eugenis, kcc, pcc, echristo, rnk
Subscribers: niravd, majnemer, atrick, rnk, emaste, bmakam, mcrosier, mehdi_amini, llvm-commits
Differential Revision: http://reviews.llvm.org/D19904
llvm-svn: 275367
2016-07-14 12:06:33 +08:00
|
|
|
void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
|
|
|
|
X86MCInstLower &MCIL) {
|
2020-01-09 01:58:42 +08:00
|
|
|
|
|
|
|
NoAutoPaddingScope NoPadScope(*OutStreamer);
|
|
|
|
|
2020-01-05 11:52:36 +08:00
|
|
|
const Function &F = MF->getFunction();
|
|
|
|
if (F.hasFnAttribute("patchable-function-entry")) {
|
|
|
|
unsigned Num;
|
|
|
|
if (F.getFnAttribute("patchable-function-entry")
|
|
|
|
.getValueAsString()
|
|
|
|
.getAsInteger(10, Num))
|
|
|
|
return;
|
2020-06-18 00:08:12 +08:00
|
|
|
emitX86Nops(*OutStreamer, Num, Subtarget);
|
2020-01-05 11:52:36 +08:00
|
|
|
return;
|
|
|
|
}
|
XRay: Add entry and exit sleds
Summary:
In this patch we implement the following parts of XRay:
- Supporting a function attribute named 'function-instrument' which currently only supports 'xray-always'. We should be able to use this attribute for other instrumentation approaches.
- Supporting a function attribute named 'xray-instruction-threshold' used to determine whether a function is instrumented with a minimum number of instructions (IR instruction counts).
- X86-specific nop sleds as described in the white paper.
- A machine function pass that adds the different instrumentation marker instructions at a very late stage.
- A way of identifying which return opcode is considered "normal" for each architecture.
There are some caveats here:
1) We don't handle PATCHABLE_RET in platforms other than x86_64 yet -- this means if IR used PATCHABLE_RET directly instead of a normal ret, instruction lowering for that platform might do the wrong thing. We think this should be handled at instruction selection time to by default be unpacked for platforms where XRay is not availble yet.
2) The generated section for X86 is different from what is described from the white paper for the sole reason that LLVM allows us to do this neatly. We're taking the opportunity to deviate from the white paper from this perspective to allow us to get richer information from the runtime library.
Reviewers: sanjoy, eugenis, kcc, pcc, echristo, rnk
Subscribers: niravd, majnemer, atrick, rnk, emaste, bmakam, mcrosier, mehdi_amini, llvm-commits
Differential Revision: http://reviews.llvm.org/D19904
llvm-svn: 275367
2016-07-14 12:06:33 +08:00
|
|
|
// We want to emit the following pattern:
|
|
|
|
//
|
2016-08-04 15:37:28 +08:00
|
|
|
// .p2align 1, ...
|
XRay: Add entry and exit sleds
Summary:
In this patch we implement the following parts of XRay:
- Supporting a function attribute named 'function-instrument' which currently only supports 'xray-always'. We should be able to use this attribute for other instrumentation approaches.
- Supporting a function attribute named 'xray-instruction-threshold' used to determine whether a function is instrumented with a minimum number of instructions (IR instruction counts).
- X86-specific nop sleds as described in the white paper.
- A machine function pass that adds the different instrumentation marker instructions at a very late stage.
- A way of identifying which return opcode is considered "normal" for each architecture.
There are some caveats here:
1) We don't handle PATCHABLE_RET in platforms other than x86_64 yet -- this means if IR used PATCHABLE_RET directly instead of a normal ret, instruction lowering for that platform might do the wrong thing. We think this should be handled at instruction selection time to by default be unpacked for platforms where XRay is not availble yet.
2) The generated section for X86 is different from what is described from the white paper for the sole reason that LLVM allows us to do this neatly. We're taking the opportunity to deviate from the white paper from this perspective to allow us to get richer information from the runtime library.
Reviewers: sanjoy, eugenis, kcc, pcc, echristo, rnk
Subscribers: niravd, majnemer, atrick, rnk, emaste, bmakam, mcrosier, mehdi_amini, llvm-commits
Differential Revision: http://reviews.llvm.org/D19904
llvm-svn: 275367
2016-07-14 12:06:33 +08:00
|
|
|
// .Lxray_sled_N:
|
|
|
|
// jmp .tmpN
|
|
|
|
// # 9 bytes worth of noops
|
|
|
|
//
|
|
|
|
// We need the 9 bytes because at runtime, we'd be patching over the full 11
|
|
|
|
// bytes with the following pattern:
|
|
|
|
//
|
|
|
|
// mov %r10, <function id, 32-bit> // 6 bytes
|
|
|
|
// call <relative offset, 32-bits> // 5 bytes
|
|
|
|
//
|
|
|
|
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
|
2020-02-15 11:21:58 +08:00
|
|
|
OutStreamer->emitCodeAlignment(2);
|
|
|
|
OutStreamer->emitLabel(CurSled);
|
XRay: Add entry and exit sleds
Summary:
In this patch we implement the following parts of XRay:
- Supporting a function attribute named 'function-instrument' which currently only supports 'xray-always'. We should be able to use this attribute for other instrumentation approaches.
- Supporting a function attribute named 'xray-instruction-threshold' used to determine whether a function is instrumented with a minimum number of instructions (IR instruction counts).
- X86-specific nop sleds as described in the white paper.
- A machine function pass that adds the different instrumentation marker instructions at a very late stage.
- A way of identifying which return opcode is considered "normal" for each architecture.
There are some caveats here:
1) We don't handle PATCHABLE_RET in platforms other than x86_64 yet -- this means if IR used PATCHABLE_RET directly instead of a normal ret, instruction lowering for that platform might do the wrong thing. We think this should be handled at instruction selection time to by default be unpacked for platforms where XRay is not availble yet.
2) The generated section for X86 is different from what is described from the white paper for the sole reason that LLVM allows us to do this neatly. We're taking the opportunity to deviate from the white paper from this perspective to allow us to get richer information from the runtime library.
Reviewers: sanjoy, eugenis, kcc, pcc, echristo, rnk
Subscribers: niravd, majnemer, atrick, rnk, emaste, bmakam, mcrosier, mehdi_amini, llvm-commits
Differential Revision: http://reviews.llvm.org/D19904
llvm-svn: 275367
2016-07-14 12:06:33 +08:00
|
|
|
|
|
|
|
// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
|
|
|
|
// an operand (computed as an offset from the jmp instruction).
|
|
|
|
// FIXME: Find another less hacky way do force the relative jump.
|
2020-02-15 10:16:24 +08:00
|
|
|
OutStreamer->emitBytes("\xeb\x09");
|
2020-06-18 00:08:12 +08:00
|
|
|
emitX86Nops(*OutStreamer, 9, Subtarget);
|
2020-04-14 13:28:16 +08:00
|
|
|
recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2);
|
XRay: Add entry and exit sleds
Summary:
In this patch we implement the following parts of XRay:
- Supporting a function attribute named 'function-instrument' which currently only supports 'xray-always'. We should be able to use this attribute for other instrumentation approaches.
- Supporting a function attribute named 'xray-instruction-threshold' used to determine whether a function is instrumented with a minimum number of instructions (IR instruction counts).
- X86-specific nop sleds as described in the white paper.
- A machine function pass that adds the different instrumentation marker instructions at a very late stage.
- A way of identifying which return opcode is considered "normal" for each architecture.
There are some caveats here:
1) We don't handle PATCHABLE_RET in platforms other than x86_64 yet -- this means if IR used PATCHABLE_RET directly instead of a normal ret, instruction lowering for that platform might do the wrong thing. We think this should be handled at instruction selection time to by default be unpacked for platforms where XRay is not availble yet.
2) The generated section for X86 is different from what is described from the white paper for the sole reason that LLVM allows us to do this neatly. We're taking the opportunity to deviate from the white paper from this perspective to allow us to get richer information from the runtime library.
Reviewers: sanjoy, eugenis, kcc, pcc, echristo, rnk
Subscribers: niravd, majnemer, atrick, rnk, emaste, bmakam, mcrosier, mehdi_amini, llvm-commits
Differential Revision: http://reviews.llvm.org/D19904
llvm-svn: 275367
2016-07-14 12:06:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
|
|
|
|
X86MCInstLower &MCIL) {
|
2020-01-09 01:58:42 +08:00
|
|
|
NoAutoPaddingScope NoPadScope(*OutStreamer);
|
|
|
|
|
XRay: Add entry and exit sleds
Summary:
In this patch we implement the following parts of XRay:
- Supporting a function attribute named 'function-instrument' which currently only supports 'xray-always'. We should be able to use this attribute for other instrumentation approaches.
- Supporting a function attribute named 'xray-instruction-threshold' used to determine whether a function is instrumented with a minimum number of instructions (IR instruction counts).
- X86-specific nop sleds as described in the white paper.
- A machine function pass that adds the different instrumentation marker instructions at a very late stage.
- A way of identifying which return opcode is considered "normal" for each architecture.
There are some caveats here:
1) We don't handle PATCHABLE_RET in platforms other than x86_64 yet -- this means if IR used PATCHABLE_RET directly instead of a normal ret, instruction lowering for that platform might do the wrong thing. We think this should be handled at instruction selection time to by default be unpacked for platforms where XRay is not availble yet.
2) The generated section for X86 is different from what is described from the white paper for the sole reason that LLVM allows us to do this neatly. We're taking the opportunity to deviate from the white paper from this perspective to allow us to get richer information from the runtime library.
Reviewers: sanjoy, eugenis, kcc, pcc, echristo, rnk
Subscribers: niravd, majnemer, atrick, rnk, emaste, bmakam, mcrosier, mehdi_amini, llvm-commits
Differential Revision: http://reviews.llvm.org/D19904
llvm-svn: 275367
2016-07-14 12:06:33 +08:00
|
|
|
// Since PATCHABLE_RET takes the opcode of the return statement as an
|
|
|
|
// argument, we use that to emit the correct form of the RET that we want.
|
|
|
|
// i.e. when we see this:
|
|
|
|
//
|
|
|
|
// PATCHABLE_RET X86::RET ...
|
|
|
|
//
|
|
|
|
// We should emit the RET followed by sleds.
|
|
|
|
//
|
2016-08-04 15:37:28 +08:00
|
|
|
// .p2align 1, ...
|
XRay: Add entry and exit sleds
Summary:
In this patch we implement the following parts of XRay:
- Supporting a function attribute named 'function-instrument' which currently only supports 'xray-always'. We should be able to use this attribute for other instrumentation approaches.
- Supporting a function attribute named 'xray-instruction-threshold' used to determine whether a function is instrumented with a minimum number of instructions (IR instruction counts).
- X86-specific nop sleds as described in the white paper.
- A machine function pass that adds the different instrumentation marker instructions at a very late stage.
- A way of identifying which return opcode is considered "normal" for each architecture.
There are some caveats here:
1) We don't handle PATCHABLE_RET in platforms other than x86_64 yet -- this means if IR used PATCHABLE_RET directly instead of a normal ret, instruction lowering for that platform might do the wrong thing. We think this should be handled at instruction selection time to by default be unpacked for platforms where XRay is not availble yet.
2) The generated section for X86 is different from what is described from the white paper for the sole reason that LLVM allows us to do this neatly. We're taking the opportunity to deviate from the white paper from this perspective to allow us to get richer information from the runtime library.
Reviewers: sanjoy, eugenis, kcc, pcc, echristo, rnk
Subscribers: niravd, majnemer, atrick, rnk, emaste, bmakam, mcrosier, mehdi_amini, llvm-commits
Differential Revision: http://reviews.llvm.org/D19904
llvm-svn: 275367
2016-07-14 12:06:33 +08:00
|
|
|
// .Lxray_sled_N:
|
|
|
|
// ret # or equivalent instruction
|
|
|
|
// # 10 bytes worth of noops
|
|
|
|
//
|
|
|
|
// This just makes sure that the alignment for the next instruction is 2.
|
|
|
|
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
|
2020-02-15 11:21:58 +08:00
|
|
|
OutStreamer->emitCodeAlignment(2);
|
|
|
|
OutStreamer->emitLabel(CurSled);
|
XRay: Add entry and exit sleds
Summary:
In this patch we implement the following parts of XRay:
- Supporting a function attribute named 'function-instrument' which currently only supports 'xray-always'. We should be able to use this attribute for other instrumentation approaches.
- Supporting a function attribute named 'xray-instruction-threshold' used to determine whether a function is instrumented with a minimum number of instructions (IR instruction counts).
- X86-specific nop sleds as described in the white paper.
- A machine function pass that adds the different instrumentation marker instructions at a very late stage.
- A way of identifying which return opcode is considered "normal" for each architecture.
There are some caveats here:
1) We don't handle PATCHABLE_RET in platforms other than x86_64 yet -- this means if IR used PATCHABLE_RET directly instead of a normal ret, instruction lowering for that platform might do the wrong thing. We think this should be handled at instruction selection time to by default be unpacked for platforms where XRay is not availble yet.
2) The generated section for X86 is different from what is described from the white paper for the sole reason that LLVM allows us to do this neatly. We're taking the opportunity to deviate from the white paper from this perspective to allow us to get richer information from the runtime library.
Reviewers: sanjoy, eugenis, kcc, pcc, echristo, rnk
Subscribers: niravd, majnemer, atrick, rnk, emaste, bmakam, mcrosier, mehdi_amini, llvm-commits
Differential Revision: http://reviews.llvm.org/D19904
llvm-svn: 275367
2016-07-14 12:06:33 +08:00
|
|
|
unsigned OpCode = MI.getOperand(0).getImm();
|
|
|
|
MCInst Ret;
|
|
|
|
Ret.setOpcode(OpCode);
|
2021-01-19 02:16:36 +08:00
|
|
|
for (auto &MO : drop_begin(MI.operands()))
|
XRay: Add entry and exit sleds
Summary:
In this patch we implement the following parts of XRay:
- Supporting a function attribute named 'function-instrument' which currently only supports 'xray-always'. We should be able to use this attribute for other instrumentation approaches.
- Supporting a function attribute named 'xray-instruction-threshold' used to determine whether a function is instrumented with a minimum number of instructions (IR instruction counts).
- X86-specific nop sleds as described in the white paper.
- A machine function pass that adds the different instrumentation marker instructions at a very late stage.
- A way of identifying which return opcode is considered "normal" for each architecture.
There are some caveats here:
1) We don't handle PATCHABLE_RET in platforms other than x86_64 yet -- this means if IR used PATCHABLE_RET directly instead of a normal ret, instruction lowering for that platform might do the wrong thing. We think this should be handled at instruction selection time to by default be unpacked for platforms where XRay is not availble yet.
2) The generated section for X86 is different from what is described from the white paper for the sole reason that LLVM allows us to do this neatly. We're taking the opportunity to deviate from the white paper from this perspective to allow us to get richer information from the runtime library.
Reviewers: sanjoy, eugenis, kcc, pcc, echristo, rnk
Subscribers: niravd, majnemer, atrick, rnk, emaste, bmakam, mcrosier, mehdi_amini, llvm-commits
Differential Revision: http://reviews.llvm.org/D19904
llvm-svn: 275367
2016-07-14 12:06:33 +08:00
|
|
|
if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
|
|
|
|
Ret.addOperand(MaybeOperand.getValue());
|
2020-02-14 13:58:16 +08:00
|
|
|
OutStreamer->emitInstruction(Ret, getSubtargetInfo());
|
2020-06-18 00:08:12 +08:00
|
|
|
emitX86Nops(*OutStreamer, 10, Subtarget);
|
2020-04-14 13:28:16 +08:00
|
|
|
recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2);
|
XRay: Add entry and exit sleds
Summary:
In this patch we implement the following parts of XRay:
- Supporting a function attribute named 'function-instrument' which currently only supports 'xray-always'. We should be able to use this attribute for other instrumentation approaches.
- Supporting a function attribute named 'xray-instruction-threshold' used to determine whether a function is instrumented with a minimum number of instructions (IR instruction counts).
- X86-specific nop sleds as described in the white paper.
- A machine function pass that adds the different instrumentation marker instructions at a very late stage.
- A way of identifying which return opcode is considered "normal" for each architecture.
There are some caveats here:
1) We don't handle PATCHABLE_RET in platforms other than x86_64 yet -- this means if IR used PATCHABLE_RET directly instead of a normal ret, instruction lowering for that platform might do the wrong thing. We think this should be handled at instruction selection time to by default be unpacked for platforms where XRay is not availble yet.
2) The generated section for X86 is different from what is described from the white paper for the sole reason that LLVM allows us to do this neatly. We're taking the opportunity to deviate from the white paper from this perspective to allow us to get richer information from the runtime library.
Reviewers: sanjoy, eugenis, kcc, pcc, echristo, rnk
Subscribers: niravd, majnemer, atrick, rnk, emaste, bmakam, mcrosier, mehdi_amini, llvm-commits
Differential Revision: http://reviews.llvm.org/D19904
llvm-svn: 275367
2016-07-14 12:06:33 +08:00
|
|
|
}
|
|
|
|
|
2018-04-18 05:30:29 +08:00
|
|
|
void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
|
|
|
|
X86MCInstLower &MCIL) {
|
2020-01-09 01:58:42 +08:00
|
|
|
NoAutoPaddingScope NoPadScope(*OutStreamer);
|
|
|
|
|
[XRay] Detect and emit sleds for sibling/tail calls
Summary:
This change promotes the 'isTailCall(...)' member function to
TargetInstrInfo as a query interface for determining on a per-target
basis whether a given MachineInstr is a tail call instruction. We build
upon this in the XRay instrumentation pass to emit special sleds for
tail call optimisations, where we emit the correct kind of sled.
The tail call sleds look like a mix between the function entry and
function exit sleds. Form-wise, the sled comes before the "jmp"
instruction that implements the tail call similar to how we do it for
the function entry sled. Functionally, because we know this is a tail
call, it behaves much like an exit sled -- i.e. at runtime we may use
the exit trampolines instead of a different kind of trampoline.
A follow-up change to recognise these sleds will be done in compiler-rt,
so that we can start intercepting these initially as exits, but also
have the option to have different log entries to more accurately reflect
that this is actually a tail call.
Reviewers: echristo, rSerge, majnemer
Subscribers: mehdi_amini, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D23986
llvm-svn: 280334
2016-09-01 09:29:13 +08:00
|
|
|
// Like PATCHABLE_RET, we have the actual instruction in the operands to this
|
|
|
|
// instruction so we lower that particular instruction and its operands.
|
|
|
|
// Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
|
|
|
|
// we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
|
|
|
|
// the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
|
|
|
|
// tail call much like how we have it in PATCHABLE_RET.
|
|
|
|
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
|
2020-02-15 11:21:58 +08:00
|
|
|
OutStreamer->emitCodeAlignment(2);
|
|
|
|
OutStreamer->emitLabel(CurSled);
|
[XRay] Detect and emit sleds for sibling/tail calls
Summary:
This change promotes the 'isTailCall(...)' member function to
TargetInstrInfo as a query interface for determining on a per-target
basis whether a given MachineInstr is a tail call instruction. We build
upon this in the XRay instrumentation pass to emit special sleds for
tail call optimisations, where we emit the correct kind of sled.
The tail call sleds look like a mix between the function entry and
function exit sleds. Form-wise, the sled comes before the "jmp"
instruction that implements the tail call similar to how we do it for
the function entry sled. Functionally, because we know this is a tail
call, it behaves much like an exit sled -- i.e. at runtime we may use
the exit trampolines instead of a different kind of trampoline.
A follow-up change to recognise these sleds will be done in compiler-rt,
so that we can start intercepting these initially as exits, but also
have the option to have different log entries to more accurately reflect
that this is actually a tail call.
Reviewers: echristo, rSerge, majnemer
Subscribers: mehdi_amini, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D23986
llvm-svn: 280334
2016-09-01 09:29:13 +08:00
|
|
|
auto Target = OutContext.createTempSymbol();
|
|
|
|
|
|
|
|
// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
|
|
|
|
// an operand (computed as an offset from the jmp instruction).
|
|
|
|
// FIXME: Find another less hacky way do force the relative jump.
|
2020-02-15 10:16:24 +08:00
|
|
|
OutStreamer->emitBytes("\xeb\x09");
|
2020-06-18 00:08:12 +08:00
|
|
|
emitX86Nops(*OutStreamer, 9, Subtarget);
|
2020-02-15 11:21:58 +08:00
|
|
|
OutStreamer->emitLabel(Target);
|
2020-04-14 13:28:16 +08:00
|
|
|
recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);
|
[XRay] Detect and emit sleds for sibling/tail calls
Summary:
This change promotes the 'isTailCall(...)' member function to
TargetInstrInfo as a query interface for determining on a per-target
basis whether a given MachineInstr is a tail call instruction. We build
upon this in the XRay instrumentation pass to emit special sleds for
tail call optimisations, where we emit the correct kind of sled.
The tail call sleds look like a mix between the function entry and
function exit sleds. Form-wise, the sled comes before the "jmp"
instruction that implements the tail call similar to how we do it for
the function entry sled. Functionally, because we know this is a tail
call, it behaves much like an exit sled -- i.e. at runtime we may use
the exit trampolines instead of a different kind of trampoline.
A follow-up change to recognise these sleds will be done in compiler-rt,
so that we can start intercepting these initially as exits, but also
have the option to have different log entries to more accurately reflect
that this is actually a tail call.
Reviewers: echristo, rSerge, majnemer
Subscribers: mehdi_amini, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D23986
llvm-svn: 280334
2016-09-01 09:29:13 +08:00
|
|
|
|
|
|
|
unsigned OpCode = MI.getOperand(0).getImm();
|
2019-08-28 01:24:23 +08:00
|
|
|
OpCode = convertTailJumpOpcode(OpCode);
|
[XRay] Detect and emit sleds for sibling/tail calls
Summary:
This change promotes the 'isTailCall(...)' member function to
TargetInstrInfo as a query interface for determining on a per-target
basis whether a given MachineInstr is a tail call instruction. We build
upon this in the XRay instrumentation pass to emit special sleds for
tail call optimisations, where we emit the correct kind of sled.
The tail call sleds look like a mix between the function entry and
function exit sleds. Form-wise, the sled comes before the "jmp"
instruction that implements the tail call similar to how we do it for
the function entry sled. Functionally, because we know this is a tail
call, it behaves much like an exit sled -- i.e. at runtime we may use
the exit trampolines instead of a different kind of trampoline.
A follow-up change to recognise these sleds will be done in compiler-rt,
so that we can start intercepting these initially as exits, but also
have the option to have different log entries to more accurately reflect
that this is actually a tail call.
Reviewers: echristo, rSerge, majnemer
Subscribers: mehdi_amini, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D23986
llvm-svn: 280334
2016-09-01 09:29:13 +08:00
|
|
|
MCInst TC;
|
|
|
|
TC.setOpcode(OpCode);
|
|
|
|
|
|
|
|
// Before emitting the instruction, add a comment to indicate that this is
|
|
|
|
// indeed a tail call.
|
|
|
|
OutStreamer->AddComment("TAILCALL");
|
2021-01-19 02:16:36 +08:00
|
|
|
for (auto &MO : drop_begin(MI.operands()))
|
[XRay] Detect and emit sleds for sibling/tail calls
Summary:
This change promotes the 'isTailCall(...)' member function to
TargetInstrInfo as a query interface for determining on a per-target
basis whether a given MachineInstr is a tail call instruction. We build
upon this in the XRay instrumentation pass to emit special sleds for
tail call optimisations, where we emit the correct kind of sled.
The tail call sleds look like a mix between the function entry and
function exit sleds. Form-wise, the sled comes before the "jmp"
instruction that implements the tail call similar to how we do it for
the function entry sled. Functionally, because we know this is a tail
call, it behaves much like an exit sled -- i.e. at runtime we may use
the exit trampolines instead of a different kind of trampoline.
A follow-up change to recognise these sleds will be done in compiler-rt,
so that we can start intercepting these initially as exits, but also
have the option to have different log entries to more accurately reflect
that this is actually a tail call.
Reviewers: echristo, rSerge, majnemer
Subscribers: mehdi_amini, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D23986
llvm-svn: 280334
2016-09-01 09:29:13 +08:00
|
|
|
if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
|
|
|
|
TC.addOperand(MaybeOperand.getValue());
|
2020-02-14 13:58:16 +08:00
|
|
|
OutStreamer->emitInstruction(TC, getSubtargetInfo());
|
[XRay] Detect and emit sleds for sibling/tail calls
Summary:
This change promotes the 'isTailCall(...)' member function to
TargetInstrInfo as a query interface for determining on a per-target
basis whether a given MachineInstr is a tail call instruction. We build
upon this in the XRay instrumentation pass to emit special sleds for
tail call optimisations, where we emit the correct kind of sled.
The tail call sleds look like a mix between the function entry and
function exit sleds. Form-wise, the sled comes before the "jmp"
instruction that implements the tail call similar to how we do it for
the function entry sled. Functionally, because we know this is a tail
call, it behaves much like an exit sled -- i.e. at runtime we may use
the exit trampolines instead of a different kind of trampoline.
A follow-up change to recognise these sleds will be done in compiler-rt,
so that we can start intercepting these initially as exits, but also
have the option to have different log entries to more accurately reflect
that this is actually a tail call.
Reviewers: echristo, rSerge, majnemer
Subscribers: mehdi_amini, dberris, llvm-commits
Differential Revision: https://reviews.llvm.org/D23986
llvm-svn: 280334
2016-09-01 09:29:13 +08:00
|
|
|
}
|
|
|
|
|
2014-08-05 05:05:27 +08:00
|
|
|
// Returns instruction preceding MBBI in MachineFunction.
|
|
|
|
// If MBBI is the first instruction of the first basic block, returns null.
|
|
|
|
static MachineBasicBlock::const_iterator
|
|
|
|
PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
|
|
|
|
const MachineBasicBlock *MBB = MBBI->getParent();
|
|
|
|
while (MBBI == MBB->begin()) {
|
2016-02-22 04:39:50 +08:00
|
|
|
if (MBB == &MBB->getParent()->front())
|
2016-07-12 11:18:50 +08:00
|
|
|
return MachineBasicBlock::const_iterator();
|
2014-08-05 05:05:27 +08:00
|
|
|
MBB = MBB->getPrevNode();
|
|
|
|
MBBI = MBB->end();
|
|
|
|
}
|
2019-04-26 04:09:00 +08:00
|
|
|
--MBBI;
|
|
|
|
return MBBI;
|
2014-08-05 05:05:27 +08:00
|
|
|
}
|
|
|
|
|
[x86] Teach the instruction lowering to add comments describing constant
pool data being loaded into a vector register.
The comments take the form of:
# ymm0 = [a,b,c,d,...]
# xmm1 = <x,y,z...>
The []s are used for generic sequential data and the <>s are used for
specifically ConstantVector loads. Undef elements are printed as the
letter 'u', integers in decimal, and floating point values as floating
point values. Suggestions on improving the formatting or other aspects
of the display are very welcome.
My primary use case for this is to be able to FileCheck test masks
passed to vector shuffle instructions in-register. It isn't fantastic
for that (no decoding special zeroing semantics or other tricks), but it
at least puts the mask onto an instruction line that could reasonably be
checked. I've updated many of the new vector shuffle lowering tests to
leverage this in their test cases so that we're actually checking the
shuffle masks remain as expected.
Before implementing this, I tried a *bunch* of different approaches.
I looked into teaching the MCInstLower code to scan up the basic block
and find a definition of a register used in a shuffle instruction and
then decode that, but this seems incredibly brittle and complex.
I talked to Hal a lot about the "right" way to do this: attach the raw
shuffle mask to the instruction itself in some form of unencoded
operands, and then use that to emit the comments. I still think that's
the optimal solution here, but it proved to be beyond what I'm up for
here. In particular, it seems likely best done by completing the
plumbing of metadata through these layers and attaching the shuffle mask
in metadata which could have fully automatic dropping when encoding an
actual instruction.
llvm-svn: 218377
2014-09-24 17:39:41 +08:00
|
|
|
static const Constant *getConstantFromPool(const MachineInstr &MI,
|
|
|
|
const MachineOperand &Op) {
|
2018-10-21 19:55:56 +08:00
|
|
|
if (!Op.isCPI() || Op.getOffset() != 0)
|
2014-09-24 11:06:37 +08:00
|
|
|
return nullptr;
|
2014-09-24 10:16:12 +08:00
|
|
|
|
2014-09-24 11:06:37 +08:00
|
|
|
ArrayRef<MachineConstantPoolEntry> Constants =
|
|
|
|
MI.getParent()->getParent()->getConstantPool()->getConstants();
|
2018-04-18 05:30:29 +08:00
|
|
|
const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
|
2014-09-24 10:16:12 +08:00
|
|
|
|
|
|
|
// Bail if this is a machine constant pool entry, we won't be able to dig out
|
|
|
|
// anything useful.
|
[x86] Teach the instruction lowering to add comments describing constant
pool data being loaded into a vector register.
The comments take the form of:
# ymm0 = [a,b,c,d,...]
# xmm1 = <x,y,z...>
The []s are used for generic sequential data and the <>s are used for
specifically ConstantVector loads. Undef elements are printed as the
letter 'u', integers in decimal, and floating point values as floating
point values. Suggestions on improving the formatting or other aspects
of the display are very welcome.
My primary use case for this is to be able to FileCheck test masks
passed to vector shuffle instructions in-register. It isn't fantastic
for that (no decoding special zeroing semantics or other tricks), but it
at least puts the mask onto an instruction line that could reasonably be
checked. I've updated many of the new vector shuffle lowering tests to
leverage this in their test cases so that we're actually checking the
shuffle masks remain as expected.
Before implementing this, I tried a *bunch* of different approaches.
I looked into teaching the MCInstLower code to scan up the basic block
and find a definition of a register used in a shuffle instruction and
then decode that, but this seems incredibly brittle and complex.
I talked to Hal a lot about the "right" way to do this: attach the raw
shuffle mask to the instruction itself in some form of unencoded
operands, and then use that to emit the comments. I still think that's
the optimal solution here, but it proved to be beyond what I'm up for
here. In particular, it seems likely best done by completing the
plumbing of metadata through these layers and attaching the shuffle mask
in metadata which could have fully automatic dropping when encoding an
actual instruction.
llvm-svn: 218377
2014-09-24 17:39:41 +08:00
|
|
|
if (ConstantEntry.isMachineConstantPoolEntry())
|
2014-09-24 11:06:37 +08:00
|
|
|
return nullptr;
|
2014-09-24 10:16:12 +08:00
|
|
|
|
2021-01-05 11:22:45 +08:00
|
|
|
return ConstantEntry.Val.ConstVal;
|
2014-09-24 11:06:37 +08:00
|
|
|
}
|
2014-09-24 10:16:12 +08:00
|
|
|
|
2018-04-18 05:30:29 +08:00
|
|
|
static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
|
|
|
|
unsigned SrcOp2Idx, ArrayRef<int> Mask) {
|
2014-09-24 11:06:37 +08:00
|
|
|
std::string Comment;
|
2014-09-24 10:16:12 +08:00
|
|
|
|
|
|
|
// Compute the name for a register. This is really goofy because we have
|
|
|
|
// multiple instruction printers that could (in theory) use different
|
|
|
|
// names. Fortunately most people use the ATT style (outside of Windows)
|
|
|
|
// and they actually agree on register naming here. Ultimately, this is
|
|
|
|
// a comment, and so its OK if it isn't perfect.
|
|
|
|
auto GetRegisterName = [](unsigned RegNum) -> StringRef {
|
|
|
|
return X86ATTInstPrinter::getRegisterName(RegNum);
|
|
|
|
};
|
|
|
|
|
2016-10-18 23:45:37 +08:00
|
|
|
const MachineOperand &DstOp = MI->getOperand(0);
|
|
|
|
const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
|
|
|
|
const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
|
|
|
|
|
2014-09-24 10:16:12 +08:00
|
|
|
StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
|
2016-04-09 22:51:26 +08:00
|
|
|
StringRef Src1Name =
|
|
|
|
SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
|
|
|
|
StringRef Src2Name =
|
|
|
|
SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
|
|
|
|
|
|
|
|
// One source operand, fix the mask to print all elements in one span.
|
|
|
|
SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end());
|
|
|
|
if (Src1Name == Src2Name)
|
|
|
|
for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
|
|
|
|
if (ShuffleMask[i] >= e)
|
|
|
|
ShuffleMask[i] -= e;
|
2014-09-24 10:16:12 +08:00
|
|
|
|
|
|
|
raw_string_ostream CS(Comment);
|
2016-10-18 23:45:37 +08:00
|
|
|
CS << DstName;
|
|
|
|
|
|
|
|
// Handle AVX512 MASK/MASXZ write mask comments.
|
|
|
|
// MASK: zmmX {%kY}
|
|
|
|
// MASKZ: zmmX {%kY} {z}
|
|
|
|
if (SrcOp1Idx > 1) {
|
|
|
|
assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
|
|
|
|
|
|
|
|
const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
|
|
|
|
if (WriteMaskOp.isReg()) {
|
|
|
|
CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
|
|
|
|
|
|
|
|
if (SrcOp1Idx == 2) {
|
|
|
|
CS << " {z}";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
CS << " = ";
|
|
|
|
|
2016-04-09 22:51:26 +08:00
|
|
|
for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
|
|
|
|
if (i != 0)
|
2014-09-24 10:16:12 +08:00
|
|
|
CS << ",";
|
2016-04-09 22:51:26 +08:00
|
|
|
if (ShuffleMask[i] == SM_SentinelZero) {
|
2014-09-24 10:16:12 +08:00
|
|
|
CS << "zero";
|
2016-04-09 22:51:26 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise, it must come from src1 or src2. Print the span of elements
|
|
|
|
// that comes from this src.
|
|
|
|
bool isSrc1 = ShuffleMask[i] < (int)e;
|
|
|
|
CS << (isSrc1 ? Src1Name : Src2Name) << '[';
|
|
|
|
|
|
|
|
bool IsFirst = true;
|
|
|
|
while (i != e && ShuffleMask[i] != SM_SentinelZero &&
|
|
|
|
(ShuffleMask[i] < (int)e) == isSrc1) {
|
|
|
|
if (!IsFirst)
|
|
|
|
CS << ',';
|
|
|
|
else
|
|
|
|
IsFirst = false;
|
|
|
|
if (ShuffleMask[i] == SM_SentinelUndef)
|
2014-09-24 10:16:12 +08:00
|
|
|
CS << "u";
|
|
|
|
else
|
2016-04-09 22:51:26 +08:00
|
|
|
CS << ShuffleMask[i] % (int)e;
|
|
|
|
++i;
|
2014-09-24 10:16:12 +08:00
|
|
|
}
|
2016-04-09 22:51:26 +08:00
|
|
|
CS << ']';
|
|
|
|
--i; // For loop increments element #.
|
2014-09-24 10:16:12 +08:00
|
|
|
}
|
|
|
|
CS.flush();
|
|
|
|
|
|
|
|
return Comment;
|
|
|
|
}
|
|
|
|
|
2018-10-02 19:32:33 +08:00
|
|
|
static void printConstant(const APInt &Val, raw_ostream &CS) {
|
|
|
|
if (Val.getBitWidth() <= 64) {
|
|
|
|
CS << Val.getZExtValue();
|
|
|
|
} else {
|
|
|
|
// print multi-word constant as (w0,w1)
|
|
|
|
CS << "(";
|
|
|
|
for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
|
|
|
|
if (i > 0)
|
|
|
|
CS << ",";
|
|
|
|
CS << Val.getRawData()[i];
|
|
|
|
}
|
|
|
|
CS << ")";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void printConstant(const APFloat &Flt, raw_ostream &CS) {
|
2018-10-02 17:08:51 +08:00
|
|
|
SmallString<32> Str;
|
2018-10-29 12:52:04 +08:00
|
|
|
// Force scientific notation to distinquish from integers.
|
|
|
|
Flt.toString(Str, 0, 0);
|
2018-10-02 17:08:51 +08:00
|
|
|
CS << Str;
|
|
|
|
}
|
|
|
|
|
2017-07-04 13:46:11 +08:00
|
|
|
static void printConstant(const Constant *COp, raw_ostream &CS) {
|
|
|
|
if (isa<UndefValue>(COp)) {
|
|
|
|
CS << "u";
|
|
|
|
} else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
|
2018-10-02 19:32:33 +08:00
|
|
|
printConstant(CI->getValue(), CS);
|
2017-07-04 13:46:11 +08:00
|
|
|
} else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
|
2018-10-02 17:08:51 +08:00
|
|
|
printConstant(CF->getValueAPF(), CS);
|
2017-07-04 13:46:11 +08:00
|
|
|
} else {
|
|
|
|
CS << "?";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
[codeview] Implement FPO data assembler directives
Summary:
This adds a set of new directives that describe 32-bit x86 prologues.
The directives are limited and do not expose the full complexity of
codeview FPO data. They are merely a convenience for the compiler to
generate more readable assembly so we don't need to generate tons of
labels in CodeGen. If our prologue emission changes in the future, we
can change the set of available directives to suit our needs. These are
modelled after the .seh_ directives, which use a different format that
interacts with exception handling.
The directives are:
.cv_fpo_proc _foo
.cv_fpo_pushreg ebp/ebx/etc
.cv_fpo_setframe ebp/esi/etc
.cv_fpo_stackalloc 200
.cv_fpo_endprologue
.cv_fpo_endproc
.cv_fpo_data _foo
I tried to follow the implementation of ARM EHABI CFI directives by
sinking most directives out of MCStreamer and into X86TargetStreamer.
This helps avoid polluting non-X86 code with WinCOFF specific logic.
I used cdb to confirm that this can show locals in parent CSRs in a few
cases, most importantly the one where we use ESI as a frame pointer,
i.e. the one in http://crbug.com/756153#c28
Once we have cdb integration in debuginfo-tests, we can add integration
tests there.
Reviewers: majnemer, hans
Subscribers: aemerson, mgorny, kristof.beyls, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D38776
llvm-svn: 315513
2017-10-12 05:24:33 +08:00
|
|
|
void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
|
|
|
|
assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
|
|
|
|
assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only");
|
|
|
|
|
|
|
|
// Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
|
|
|
|
if (EmitFPOData) {
|
|
|
|
X86TargetStreamer *XTS =
|
|
|
|
static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
|
|
|
|
switch (MI->getOpcode()) {
|
|
|
|
case X86::SEH_PushReg:
|
|
|
|
XTS->emitFPOPushReg(MI->getOperand(0).getImm());
|
|
|
|
break;
|
|
|
|
case X86::SEH_StackAlloc:
|
|
|
|
XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
|
|
|
|
break;
|
2018-10-03 00:43:52 +08:00
|
|
|
case X86::SEH_StackAlign:
|
|
|
|
XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
|
|
|
|
break;
|
[codeview] Implement FPO data assembler directives
Summary:
This adds a set of new directives that describe 32-bit x86 prologues.
The directives are limited and do not expose the full complexity of
codeview FPO data. They are merely a convenience for the compiler to
generate more readable assembly so we don't need to generate tons of
labels in CodeGen. If our prologue emission changes in the future, we
can change the set of available directives to suit our needs. These are
modelled after the .seh_ directives, which use a different format that
interacts with exception handling.
The directives are:
.cv_fpo_proc _foo
.cv_fpo_pushreg ebp/ebx/etc
.cv_fpo_setframe ebp/esi/etc
.cv_fpo_stackalloc 200
.cv_fpo_endprologue
.cv_fpo_endproc
.cv_fpo_data _foo
I tried to follow the implementation of ARM EHABI CFI directives by
sinking most directives out of MCStreamer and into X86TargetStreamer.
This helps avoid polluting non-X86 code with WinCOFF specific logic.
I used cdb to confirm that this can show locals in parent CSRs in a few
cases, most importantly the one where we use ESI as a frame pointer,
i.e. the one in http://crbug.com/756153#c28
Once we have cdb integration in debuginfo-tests, we can add integration
tests there.
Reviewers: majnemer, hans
Subscribers: aemerson, mgorny, kristof.beyls, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D38776
llvm-svn: 315513
2017-10-12 05:24:33 +08:00
|
|
|
case X86::SEH_SetFrame:
|
|
|
|
assert(MI->getOperand(1).getImm() == 0 &&
|
|
|
|
".cv_fpo_setframe takes no offset");
|
|
|
|
XTS->emitFPOSetFrame(MI->getOperand(0).getImm());
|
|
|
|
break;
|
|
|
|
case X86::SEH_EndPrologue:
|
|
|
|
XTS->emitFPOEndPrologue();
|
|
|
|
break;
|
|
|
|
case X86::SEH_SaveReg:
|
|
|
|
case X86::SEH_SaveXMM:
|
|
|
|
case X86::SEH_PushFrame:
|
|
|
|
llvm_unreachable("SEH_ directive incompatible with FPO");
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("expected SEH_ instruction");
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise, use the .seh_ directives for all other Windows platforms.
|
|
|
|
switch (MI->getOpcode()) {
|
|
|
|
case X86::SEH_PushReg:
|
2019-08-31 05:23:05 +08:00
|
|
|
OutStreamer->EmitWinCFIPushReg(MI->getOperand(0).getImm());
|
[codeview] Implement FPO data assembler directives
Summary:
This adds a set of new directives that describe 32-bit x86 prologues.
The directives are limited and do not expose the full complexity of
codeview FPO data. They are merely a convenience for the compiler to
generate more readable assembly so we don't need to generate tons of
labels in CodeGen. If our prologue emission changes in the future, we
can change the set of available directives to suit our needs. These are
modelled after the .seh_ directives, which use a different format that
interacts with exception handling.
The directives are:
.cv_fpo_proc _foo
.cv_fpo_pushreg ebp/ebx/etc
.cv_fpo_setframe ebp/esi/etc
.cv_fpo_stackalloc 200
.cv_fpo_endprologue
.cv_fpo_endproc
.cv_fpo_data _foo
I tried to follow the implementation of ARM EHABI CFI directives by
sinking most directives out of MCStreamer and into X86TargetStreamer.
This helps avoid polluting non-X86 code with WinCOFF specific logic.
I used cdb to confirm that this can show locals in parent CSRs in a few
cases, most importantly the one where we use ESI as a frame pointer,
i.e. the one in http://crbug.com/756153#c28
Once we have cdb integration in debuginfo-tests, we can add integration
tests there.
Reviewers: majnemer, hans
Subscribers: aemerson, mgorny, kristof.beyls, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D38776
llvm-svn: 315513
2017-10-12 05:24:33 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
case X86::SEH_SaveReg:
|
2019-08-31 05:23:05 +08:00
|
|
|
OutStreamer->EmitWinCFISaveReg(MI->getOperand(0).getImm(),
|
[codeview] Implement FPO data assembler directives
Summary:
This adds a set of new directives that describe 32-bit x86 prologues.
The directives are limited and do not expose the full complexity of
codeview FPO data. They are merely a convenience for the compiler to
generate more readable assembly so we don't need to generate tons of
labels in CodeGen. If our prologue emission changes in the future, we
can change the set of available directives to suit our needs. These are
modelled after the .seh_ directives, which use a different format that
interacts with exception handling.
The directives are:
.cv_fpo_proc _foo
.cv_fpo_pushreg ebp/ebx/etc
.cv_fpo_setframe ebp/esi/etc
.cv_fpo_stackalloc 200
.cv_fpo_endprologue
.cv_fpo_endproc
.cv_fpo_data _foo
I tried to follow the implementation of ARM EHABI CFI directives by
sinking most directives out of MCStreamer and into X86TargetStreamer.
This helps avoid polluting non-X86 code with WinCOFF specific logic.
I used cdb to confirm that this can show locals in parent CSRs in a few
cases, most importantly the one where we use ESI as a frame pointer,
i.e. the one in http://crbug.com/756153#c28
Once we have cdb integration in debuginfo-tests, we can add integration
tests there.
Reviewers: majnemer, hans
Subscribers: aemerson, mgorny, kristof.beyls, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D38776
llvm-svn: 315513
2017-10-12 05:24:33 +08:00
|
|
|
MI->getOperand(1).getImm());
|
|
|
|
break;
|
|
|
|
|
|
|
|
case X86::SEH_SaveXMM:
|
2019-08-31 05:23:05 +08:00
|
|
|
OutStreamer->EmitWinCFISaveXMM(MI->getOperand(0).getImm(),
|
[codeview] Implement FPO data assembler directives
Summary:
This adds a set of new directives that describe 32-bit x86 prologues.
The directives are limited and do not expose the full complexity of
codeview FPO data. They are merely a convenience for the compiler to
generate more readable assembly so we don't need to generate tons of
labels in CodeGen. If our prologue emission changes in the future, we
can change the set of available directives to suit our needs. These are
modelled after the .seh_ directives, which use a different format that
interacts with exception handling.
The directives are:
.cv_fpo_proc _foo
.cv_fpo_pushreg ebp/ebx/etc
.cv_fpo_setframe ebp/esi/etc
.cv_fpo_stackalloc 200
.cv_fpo_endprologue
.cv_fpo_endproc
.cv_fpo_data _foo
I tried to follow the implementation of ARM EHABI CFI directives by
sinking most directives out of MCStreamer and into X86TargetStreamer.
This helps avoid polluting non-X86 code with WinCOFF specific logic.
I used cdb to confirm that this can show locals in parent CSRs in a few
cases, most importantly the one where we use ESI as a frame pointer,
i.e. the one in http://crbug.com/756153#c28
Once we have cdb integration in debuginfo-tests, we can add integration
tests there.
Reviewers: majnemer, hans
Subscribers: aemerson, mgorny, kristof.beyls, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D38776
llvm-svn: 315513
2017-10-12 05:24:33 +08:00
|
|
|
MI->getOperand(1).getImm());
|
|
|
|
break;
|
|
|
|
|
|
|
|
case X86::SEH_StackAlloc:
|
|
|
|
OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm());
|
|
|
|
break;
|
|
|
|
|
|
|
|
case X86::SEH_SetFrame:
|
2019-08-31 05:23:05 +08:00
|
|
|
OutStreamer->EmitWinCFISetFrame(MI->getOperand(0).getImm(),
|
|
|
|
MI->getOperand(1).getImm());
|
[codeview] Implement FPO data assembler directives
Summary:
This adds a set of new directives that describe 32-bit x86 prologues.
The directives are limited and do not expose the full complexity of
codeview FPO data. They are merely a convenience for the compiler to
generate more readable assembly so we don't need to generate tons of
labels in CodeGen. If our prologue emission changes in the future, we
can change the set of available directives to suit our needs. These are
modelled after the .seh_ directives, which use a different format that
interacts with exception handling.
The directives are:
.cv_fpo_proc _foo
.cv_fpo_pushreg ebp/ebx/etc
.cv_fpo_setframe ebp/esi/etc
.cv_fpo_stackalloc 200
.cv_fpo_endprologue
.cv_fpo_endproc
.cv_fpo_data _foo
I tried to follow the implementation of ARM EHABI CFI directives by
sinking most directives out of MCStreamer and into X86TargetStreamer.
This helps avoid polluting non-X86 code with WinCOFF specific logic.
I used cdb to confirm that this can show locals in parent CSRs in a few
cases, most importantly the one where we use ESI as a frame pointer,
i.e. the one in http://crbug.com/756153#c28
Once we have cdb integration in debuginfo-tests, we can add integration
tests there.
Reviewers: majnemer, hans
Subscribers: aemerson, mgorny, kristof.beyls, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D38776
llvm-svn: 315513
2017-10-12 05:24:33 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
case X86::SEH_PushFrame:
|
|
|
|
OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm());
|
|
|
|
break;
|
|
|
|
|
|
|
|
case X86::SEH_EndPrologue:
|
|
|
|
OutStreamer->EmitWinCFIEndProlog();
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
llvm_unreachable("expected SEH_ instruction");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-23 06:14:05 +08:00
|
|
|
static unsigned getRegisterWidth(const MCOperandInfo &Info) {
|
|
|
|
if (Info.RegClass == X86::VR128RegClassID ||
|
|
|
|
Info.RegClass == X86::VR128XRegClassID)
|
|
|
|
return 128;
|
|
|
|
if (Info.RegClass == X86::VR256RegClassID ||
|
|
|
|
Info.RegClass == X86::VR256XRegClassID)
|
|
|
|
return 256;
|
|
|
|
if (Info.RegClass == X86::VR512RegClassID)
|
|
|
|
return 512;
|
|
|
|
llvm_unreachable("Unknown register class!");
|
|
|
|
}
|
|
|
|
|
2020-05-30 15:31:21 +08:00
|
|
|
static void addConstantComments(const MachineInstr *MI,
|
|
|
|
MCStreamer &OutStreamer) {
|
2009-09-13 04:34:57 +08:00
|
|
|
switch (MI->getOpcode()) {
|
2015-12-27 03:48:43 +08:00
|
|
|
// Lower PSHUFB and VPERMILP normally but add a comment if we can find
|
|
|
|
// a constant shuffle mask. We won't be able to do this at the MC layer
|
|
|
|
// because the mask isn't an immediate.
|
2014-07-26 07:47:11 +08:00
|
|
|
case X86::PSHUFBrm:
|
2014-09-25 08:24:19 +08:00
|
|
|
case X86::VPSHUFBrm:
|
2015-12-27 03:48:43 +08:00
|
|
|
case X86::VPSHUFBYrm:
|
|
|
|
case X86::VPSHUFBZ128rm:
|
|
|
|
case X86::VPSHUFBZ128rmk:
|
|
|
|
case X86::VPSHUFBZ128rmkz:
|
|
|
|
case X86::VPSHUFBZ256rm:
|
|
|
|
case X86::VPSHUFBZ256rmk:
|
|
|
|
case X86::VPSHUFBZ256rmkz:
|
|
|
|
case X86::VPSHUFBZrm:
|
|
|
|
case X86::VPSHUFBZrmk:
|
|
|
|
case X86::VPSHUFBZrmkz: {
|
2020-05-31 03:53:56 +08:00
|
|
|
unsigned SrcIdx = 1;
|
|
|
|
if (X86II::isKMasked(MI->getDesc().TSFlags)) {
|
|
|
|
// Skip mask operand.
|
|
|
|
++SrcIdx;
|
|
|
|
if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
|
|
|
|
// Skip passthru operand.
|
|
|
|
++SrcIdx;
|
|
|
|
}
|
2015-12-27 03:48:43 +08:00
|
|
|
}
|
2020-05-31 03:53:56 +08:00
|
|
|
unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
|
2015-12-27 03:48:43 +08:00
|
|
|
|
2020-05-31 03:53:56 +08:00
|
|
|
assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
|
|
|
|
"Unexpected number of operands!");
|
2014-09-24 11:06:37 +08:00
|
|
|
|
2016-10-18 23:45:37 +08:00
|
|
|
const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
|
[x86] Teach the instruction lowering to add comments describing constant
pool data being loaded into a vector register.
The comments take the form of:
# ymm0 = [a,b,c,d,...]
# xmm1 = <x,y,z...>
The []s are used for generic sequential data and the <>s are used for
specifically ConstantVector loads. Undef elements are printed as the
letter 'u', integers in decimal, and floating point values as floating
point values. Suggestions on improving the formatting or other aspects
of the display are very welcome.
My primary use case for this is to be able to FileCheck test masks
passed to vector shuffle instructions in-register. It isn't fantastic
for that (no decoding special zeroing semantics or other tricks), but it
at least puts the mask onto an instruction line that could reasonably be
checked. I've updated many of the new vector shuffle lowering tests to
leverage this in their test cases so that we're actually checking the
shuffle masks remain as expected.
Before implementing this, I tried a *bunch* of different approaches.
I looked into teaching the MCInstLower code to scan up the basic block
and find a definition of a register used in a shuffle instruction and
then decode that, but this seems incredibly brittle and complex.
I talked to Hal a lot about the "right" way to do this: attach the raw
shuffle mask to the instruction itself in some form of unencoded
operands, and then use that to emit the comments. I still think that's
the optimal solution here, but it proved to be beyond what I'm up for
here. In particular, it seems likely best done by completing the
plumbing of metadata through these layers and attaching the shuffle mask
in metadata which could have fully automatic dropping when encoding an
actual instruction.
llvm-svn: 218377
2014-09-24 17:39:41 +08:00
|
|
|
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
|
2018-10-23 06:14:05 +08:00
|
|
|
unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
|
2016-11-25 10:29:21 +08:00
|
|
|
SmallVector<int, 64> Mask;
|
2018-10-23 06:14:05 +08:00
|
|
|
DecodePSHUFBMask(C, Width, Mask);
|
2014-09-24 11:06:37 +08:00
|
|
|
if (!Mask.empty())
|
2020-05-30 15:31:21 +08:00
|
|
|
OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
|
2014-09-24 11:06:37 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2016-07-13 23:45:36 +08:00
|
|
|
|
2016-10-18 11:36:52 +08:00
|
|
|
case X86::VPERMILPSrm:
|
|
|
|
case X86::VPERMILPSYrm:
|
|
|
|
case X86::VPERMILPSZ128rm:
|
|
|
|
case X86::VPERMILPSZ128rmk:
|
|
|
|
case X86::VPERMILPSZ128rmkz:
|
|
|
|
case X86::VPERMILPSZ256rm:
|
|
|
|
case X86::VPERMILPSZ256rmk:
|
|
|
|
case X86::VPERMILPSZ256rmkz:
|
|
|
|
case X86::VPERMILPSZrm:
|
|
|
|
case X86::VPERMILPSZrmk:
|
|
|
|
case X86::VPERMILPSZrmkz:
|
2014-09-23 18:08:29 +08:00
|
|
|
case X86::VPERMILPDrm:
|
2016-07-13 23:45:36 +08:00
|
|
|
case X86::VPERMILPDYrm:
|
|
|
|
case X86::VPERMILPDZ128rm:
|
2016-10-18 11:36:52 +08:00
|
|
|
case X86::VPERMILPDZ128rmk:
|
|
|
|
case X86::VPERMILPDZ128rmkz:
|
2016-07-13 23:45:36 +08:00
|
|
|
case X86::VPERMILPDZ256rm:
|
2016-10-18 11:36:52 +08:00
|
|
|
case X86::VPERMILPDZ256rmk:
|
|
|
|
case X86::VPERMILPDZ256rmkz:
|
|
|
|
case X86::VPERMILPDZrm:
|
|
|
|
case X86::VPERMILPDZrmk:
|
|
|
|
case X86::VPERMILPDZrmkz: {
|
|
|
|
unsigned ElSize;
|
|
|
|
switch (MI->getOpcode()) {
|
2018-07-02 14:42:42 +08:00
|
|
|
default: llvm_unreachable("Invalid opcode");
|
2016-10-18 11:36:52 +08:00
|
|
|
case X86::VPERMILPSrm:
|
|
|
|
case X86::VPERMILPSYrm:
|
|
|
|
case X86::VPERMILPSZ128rm:
|
|
|
|
case X86::VPERMILPSZ256rm:
|
|
|
|
case X86::VPERMILPSZrm:
|
|
|
|
case X86::VPERMILPSZ128rmkz:
|
|
|
|
case X86::VPERMILPSZ256rmkz:
|
|
|
|
case X86::VPERMILPSZrmkz:
|
|
|
|
case X86::VPERMILPSZ128rmk:
|
|
|
|
case X86::VPERMILPSZ256rmk:
|
|
|
|
case X86::VPERMILPSZrmk:
|
2020-05-31 03:53:56 +08:00
|
|
|
ElSize = 32;
|
|
|
|
break;
|
2016-10-18 11:36:52 +08:00
|
|
|
case X86::VPERMILPDrm:
|
|
|
|
case X86::VPERMILPDYrm:
|
|
|
|
case X86::VPERMILPDZ128rm:
|
|
|
|
case X86::VPERMILPDZ256rm:
|
|
|
|
case X86::VPERMILPDZrm:
|
|
|
|
case X86::VPERMILPDZ128rmkz:
|
|
|
|
case X86::VPERMILPDZ256rmkz:
|
|
|
|
case X86::VPERMILPDZrmkz:
|
|
|
|
case X86::VPERMILPDZ128rmk:
|
|
|
|
case X86::VPERMILPDZ256rmk:
|
|
|
|
case X86::VPERMILPDZrmk:
|
2020-05-31 03:53:56 +08:00
|
|
|
ElSize = 64;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned SrcIdx = 1;
|
|
|
|
if (X86II::isKMasked(MI->getDesc().TSFlags)) {
|
|
|
|
// Skip mask operand.
|
|
|
|
++SrcIdx;
|
|
|
|
if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
|
|
|
|
// Skip passthru operand.
|
|
|
|
++SrcIdx;
|
|
|
|
}
|
2015-12-26 12:50:07 +08:00
|
|
|
}
|
2020-05-31 03:53:56 +08:00
|
|
|
unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
|
2016-07-13 23:45:36 +08:00
|
|
|
|
2020-05-31 03:53:56 +08:00
|
|
|
assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
|
|
|
|
"Unexpected number of operands!");
|
2015-12-26 12:50:07 +08:00
|
|
|
|
2016-10-18 23:45:37 +08:00
|
|
|
const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
|
[x86] Teach the instruction lowering to add comments describing constant
pool data being loaded into a vector register.
The comments take the form of:
# ymm0 = [a,b,c,d,...]
# xmm1 = <x,y,z...>
The []s are used for generic sequential data and the <>s are used for
specifically ConstantVector loads. Undef elements are printed as the
letter 'u', integers in decimal, and floating point values as floating
point values. Suggestions on improving the formatting or other aspects
of the display are very welcome.
My primary use case for this is to be able to FileCheck test masks
passed to vector shuffle instructions in-register. It isn't fantastic
for that (no decoding special zeroing semantics or other tricks), but it
at least puts the mask onto an instruction line that could reasonably be
checked. I've updated many of the new vector shuffle lowering tests to
leverage this in their test cases so that we're actually checking the
shuffle masks remain as expected.
Before implementing this, I tried a *bunch* of different approaches.
I looked into teaching the MCInstLower code to scan up the basic block
and find a definition of a register used in a shuffle instruction and
then decode that, but this seems incredibly brittle and complex.
I talked to Hal a lot about the "right" way to do this: attach the raw
shuffle mask to the instruction itself in some form of unencoded
operands, and then use that to emit the comments. I still think that's
the optimal solution here, but it proved to be beyond what I'm up for
here. In particular, it seems likely best done by completing the
plumbing of metadata through these layers and attaching the shuffle mask
in metadata which could have fully automatic dropping when encoding an
actual instruction.
llvm-svn: 218377
2014-09-24 17:39:41 +08:00
|
|
|
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
|
2018-10-23 06:14:05 +08:00
|
|
|
unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
|
2014-09-24 11:06:37 +08:00
|
|
|
SmallVector<int, 16> Mask;
|
2018-10-23 06:14:05 +08:00
|
|
|
DecodeVPERMILPMask(C, ElSize, Width, Mask);
|
2014-09-24 11:06:37 +08:00
|
|
|
if (!Mask.empty())
|
2020-05-30 15:31:21 +08:00
|
|
|
OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
|
2016-04-09 22:51:26 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2016-06-05 05:44:28 +08:00
|
|
|
|
|
|
|
case X86::VPERMIL2PDrm:
|
|
|
|
case X86::VPERMIL2PSrm:
|
2017-02-19 06:53:43 +08:00
|
|
|
case X86::VPERMIL2PDYrm:
|
|
|
|
case X86::VPERMIL2PSYrm: {
|
2020-05-31 03:53:56 +08:00
|
|
|
assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) &&
|
|
|
|
"Unexpected number of operands!");
|
2016-06-05 05:44:28 +08:00
|
|
|
|
2016-10-18 23:45:37 +08:00
|
|
|
const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
|
2016-06-05 05:44:28 +08:00
|
|
|
if (!CtrlOp.isImm())
|
|
|
|
break;
|
|
|
|
|
|
|
|
unsigned ElSize;
|
|
|
|
switch (MI->getOpcode()) {
|
2018-07-02 14:42:42 +08:00
|
|
|
default: llvm_unreachable("Invalid opcode");
|
|
|
|
case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
|
|
|
|
case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
|
2016-06-05 05:44:28 +08:00
|
|
|
}
|
|
|
|
|
2020-05-31 03:53:56 +08:00
|
|
|
const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
|
2016-06-05 05:44:28 +08:00
|
|
|
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
|
2018-10-23 06:14:05 +08:00
|
|
|
unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
|
2016-06-05 05:44:28 +08:00
|
|
|
SmallVector<int, 16> Mask;
|
2018-10-23 06:14:05 +08:00
|
|
|
DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
|
2016-06-05 05:44:28 +08:00
|
|
|
if (!Mask.empty())
|
2020-05-30 15:31:21 +08:00
|
|
|
OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
|
2016-06-05 05:44:28 +08:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2016-04-09 22:51:26 +08:00
|
|
|
case X86::VPPERMrrm: {
|
2020-05-31 03:53:56 +08:00
|
|
|
assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands) &&
|
|
|
|
"Unexpected number of operands!");
|
2016-04-09 22:51:26 +08:00
|
|
|
|
2020-05-31 03:53:56 +08:00
|
|
|
const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
|
2016-04-09 22:51:26 +08:00
|
|
|
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
|
2018-10-23 06:14:05 +08:00
|
|
|
unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
|
2016-04-09 22:51:26 +08:00
|
|
|
SmallVector<int, 16> Mask;
|
2018-10-23 06:14:05 +08:00
|
|
|
DecodeVPPERMMask(C, Width, Mask);
|
2016-04-09 22:51:26 +08:00
|
|
|
if (!Mask.empty())
|
2020-05-30 15:31:21 +08:00
|
|
|
OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
|
2014-09-24 11:06:37 +08:00
|
|
|
}
|
2014-07-26 07:47:11 +08:00
|
|
|
break;
|
2009-09-13 04:34:57 +08:00
|
|
|
}
|
[x86] Teach the instruction lowering to add comments describing constant
pool data being loaded into a vector register.
The comments take the form of:
# ymm0 = [a,b,c,d,...]
# xmm1 = <x,y,z...>
The []s are used for generic sequential data and the <>s are used for
specifically ConstantVector loads. Undef elements are printed as the
letter 'u', integers in decimal, and floating point values as floating
point values. Suggestions on improving the formatting or other aspects
of the display are very welcome.
My primary use case for this is to be able to FileCheck test masks
passed to vector shuffle instructions in-register. It isn't fantastic
for that (no decoding special zeroing semantics or other tricks), but it
at least puts the mask onto an instruction line that could reasonably be
checked. I've updated many of the new vector shuffle lowering tests to
leverage this in their test cases so that we're actually checking the
shuffle masks remain as expected.
Before implementing this, I tried a *bunch* of different approaches.
I looked into teaching the MCInstLower code to scan up the basic block
and find a definition of a register used in a shuffle instruction and
then decode that, but this seems incredibly brittle and complex.
I talked to Hal a lot about the "right" way to do this: attach the raw
shuffle mask to the instruction itself in some form of unencoded
operands, and then use that to emit the comments. I still think that's
the optimal solution here, but it proved to be beyond what I'm up for
here. In particular, it seems likely best done by completing the
plumbing of metadata through these layers and attaching the shuffle mask
in metadata which could have fully automatic dropping when encoding an
actual instruction.
llvm-svn: 218377
2014-09-24 17:39:41 +08:00
|
|
|
|
2018-03-02 06:22:31 +08:00
|
|
|
case X86::MMX_MOVQ64rm: {
|
2020-05-31 03:53:56 +08:00
|
|
|
assert(MI->getNumOperands() == (1 + X86::AddrNumOperands) &&
|
|
|
|
"Unexpected number of operands!");
|
|
|
|
if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
|
2018-03-02 06:22:31 +08:00
|
|
|
std::string Comment;
|
|
|
|
raw_string_ostream CS(Comment);
|
|
|
|
const MachineOperand &DstOp = MI->getOperand(0);
|
|
|
|
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
|
|
|
|
if (auto *CF = dyn_cast<ConstantFP>(C)) {
|
|
|
|
CS << "0x" << CF->getValueAPF().bitcastToAPInt().toString(16, false);
|
2020-05-30 15:31:21 +08:00
|
|
|
OutStreamer.AddComment(CS.str());
|
2018-03-02 06:22:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2018-04-18 05:30:29 +08:00
|
|
|
#define MOV_CASE(Prefix, Suffix) \
|
|
|
|
case X86::Prefix##MOVAPD##Suffix##rm: \
|
|
|
|
case X86::Prefix##MOVAPS##Suffix##rm: \
|
|
|
|
case X86::Prefix##MOVUPD##Suffix##rm: \
|
|
|
|
case X86::Prefix##MOVUPS##Suffix##rm: \
|
|
|
|
case X86::Prefix##MOVDQA##Suffix##rm: \
|
2015-09-08 14:38:21 +08:00
|
|
|
case X86::Prefix##MOVDQU##Suffix##rm:
|
|
|
|
|
2018-04-18 05:30:29 +08:00
|
|
|
#define MOV_AVX512_CASE(Suffix) \
|
|
|
|
case X86::VMOVDQA64##Suffix##rm: \
|
|
|
|
case X86::VMOVDQA32##Suffix##rm: \
|
|
|
|
case X86::VMOVDQU64##Suffix##rm: \
|
|
|
|
case X86::VMOVDQU32##Suffix##rm: \
|
|
|
|
case X86::VMOVDQU16##Suffix##rm: \
|
|
|
|
case X86::VMOVDQU8##Suffix##rm: \
|
|
|
|
case X86::VMOVAPS##Suffix##rm: \
|
|
|
|
case X86::VMOVAPD##Suffix##rm: \
|
|
|
|
case X86::VMOVUPS##Suffix##rm: \
|
2015-09-08 14:38:21 +08:00
|
|
|
case X86::VMOVUPD##Suffix##rm:
|
|
|
|
|
2018-04-18 05:30:29 +08:00
|
|
|
#define CASE_ALL_MOV_RM() \
|
|
|
|
MOV_CASE(, ) /* SSE */ \
|
|
|
|
MOV_CASE(V, ) /* AVX-128 */ \
|
|
|
|
MOV_CASE(V, Y) /* AVX-256 */ \
|
|
|
|
MOV_AVX512_CASE(Z) \
|
|
|
|
MOV_AVX512_CASE(Z256) \
|
2015-09-08 14:38:21 +08:00
|
|
|
MOV_AVX512_CASE(Z128)
|
|
|
|
|
2018-04-18 05:30:29 +08:00
|
|
|
// For loads from a constant pool to a vector register, print the constant
|
|
|
|
// loaded.
|
|
|
|
CASE_ALL_MOV_RM()
|
2017-07-04 13:46:11 +08:00
|
|
|
case X86::VBROADCASTF128:
|
|
|
|
case X86::VBROADCASTI128:
|
|
|
|
case X86::VBROADCASTF32X4Z256rm:
|
|
|
|
case X86::VBROADCASTF32X4rm:
|
|
|
|
case X86::VBROADCASTF32X8rm:
|
|
|
|
case X86::VBROADCASTF64X2Z128rm:
|
|
|
|
case X86::VBROADCASTF64X2rm:
|
|
|
|
case X86::VBROADCASTF64X4rm:
|
|
|
|
case X86::VBROADCASTI32X4Z256rm:
|
|
|
|
case X86::VBROADCASTI32X4rm:
|
|
|
|
case X86::VBROADCASTI32X8rm:
|
|
|
|
case X86::VBROADCASTI64X2Z128rm:
|
|
|
|
case X86::VBROADCASTI64X2rm:
|
|
|
|
case X86::VBROADCASTI64X4rm:
|
2020-05-31 03:53:56 +08:00
|
|
|
assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
|
|
|
|
"Unexpected number of operands!");
|
|
|
|
if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
|
2017-07-04 13:46:11 +08:00
|
|
|
int NumLanes = 1;
|
|
|
|
// Override NumLanes for the broadcast instructions.
|
|
|
|
switch (MI->getOpcode()) {
|
2018-07-02 14:42:42 +08:00
|
|
|
case X86::VBROADCASTF128: NumLanes = 2; break;
|
|
|
|
case X86::VBROADCASTI128: NumLanes = 2; break;
|
|
|
|
case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
|
|
|
|
case X86::VBROADCASTF32X4rm: NumLanes = 4; break;
|
|
|
|
case X86::VBROADCASTF32X8rm: NumLanes = 2; break;
|
|
|
|
case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
|
|
|
|
case X86::VBROADCASTF64X2rm: NumLanes = 4; break;
|
|
|
|
case X86::VBROADCASTF64X4rm: NumLanes = 2; break;
|
|
|
|
case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
|
|
|
|
case X86::VBROADCASTI32X4rm: NumLanes = 4; break;
|
|
|
|
case X86::VBROADCASTI32X8rm: NumLanes = 2; break;
|
|
|
|
case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
|
|
|
|
case X86::VBROADCASTI64X2rm: NumLanes = 4; break;
|
|
|
|
case X86::VBROADCASTI64X4rm: NumLanes = 2; break;
|
2017-07-04 13:46:11 +08:00
|
|
|
}
|
|
|
|
|
[x86] Teach the instruction lowering to add comments describing constant
pool data being loaded into a vector register.
The comments take the form of:
# ymm0 = [a,b,c,d,...]
# xmm1 = <x,y,z...>
The []s are used for generic sequential data and the <>s are used for
specifically ConstantVector loads. Undef elements are printed as the
letter 'u', integers in decimal, and floating point values as floating
point values. Suggestions on improving the formatting or other aspects
of the display are very welcome.
My primary use case for this is to be able to FileCheck test masks
passed to vector shuffle instructions in-register. It isn't fantastic
for that (no decoding special zeroing semantics or other tricks), but it
at least puts the mask onto an instruction line that could reasonably be
checked. I've updated many of the new vector shuffle lowering tests to
leverage this in their test cases so that we're actually checking the
shuffle masks remain as expected.
Before implementing this, I tried a *bunch* of different approaches.
I looked into teaching the MCInstLower code to scan up the basic block
and find a definition of a register used in a shuffle instruction and
then decode that, but this seems incredibly brittle and complex.
I talked to Hal a lot about the "right" way to do this: attach the raw
shuffle mask to the instruction itself in some form of unencoded
operands, and then use that to emit the comments. I still think that's
the optimal solution here, but it proved to be beyond what I'm up for
here. In particular, it seems likely best done by completing the
plumbing of metadata through these layers and attaching the shuffle mask
in metadata which could have fully automatic dropping when encoding an
actual instruction.
llvm-svn: 218377
2014-09-24 17:39:41 +08:00
|
|
|
std::string Comment;
|
|
|
|
raw_string_ostream CS(Comment);
|
|
|
|
const MachineOperand &DstOp = MI->getOperand(0);
|
|
|
|
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
|
|
|
|
if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
|
|
|
|
CS << "[";
|
2017-07-04 13:46:11 +08:00
|
|
|
for (int l = 0; l != NumLanes; ++l) {
|
2018-04-18 05:30:29 +08:00
|
|
|
for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements;
|
|
|
|
++i) {
|
2017-07-04 13:46:11 +08:00
|
|
|
if (i != 0 || l != 0)
|
|
|
|
CS << ",";
|
|
|
|
if (CDS->getElementType()->isIntegerTy())
|
2018-10-02 19:32:33 +08:00
|
|
|
printConstant(CDS->getElementAsAPInt(i), CS);
|
2018-10-02 17:08:51 +08:00
|
|
|
else if (CDS->getElementType()->isHalfTy() ||
|
|
|
|
CDS->getElementType()->isFloatTy() ||
|
|
|
|
CDS->getElementType()->isDoubleTy())
|
|
|
|
printConstant(CDS->getElementAsAPFloat(i), CS);
|
2017-07-04 13:46:11 +08:00
|
|
|
else
|
|
|
|
CS << "?";
|
|
|
|
}
|
[x86] Teach the instruction lowering to add comments describing constant
pool data being loaded into a vector register.
The comments take the form of:
# ymm0 = [a,b,c,d,...]
# xmm1 = <x,y,z...>
The []s are used for generic sequential data and the <>s are used for
specifically ConstantVector loads. Undef elements are printed as the
letter 'u', integers in decimal, and floating point values as floating
point values. Suggestions on improving the formatting or other aspects
of the display are very welcome.
My primary use case for this is to be able to FileCheck test masks
passed to vector shuffle instructions in-register. It isn't fantastic
for that (no decoding special zeroing semantics or other tricks), but it
at least puts the mask onto an instruction line that could reasonably be
checked. I've updated many of the new vector shuffle lowering tests to
leverage this in their test cases so that we're actually checking the
shuffle masks remain as expected.
Before implementing this, I tried a *bunch* of different approaches.
I looked into teaching the MCInstLower code to scan up the basic block
and find a definition of a register used in a shuffle instruction and
then decode that, but this seems incredibly brittle and complex.
I talked to Hal a lot about the "right" way to do this: attach the raw
shuffle mask to the instruction itself in some form of unencoded
operands, and then use that to emit the comments. I still think that's
the optimal solution here, but it proved to be beyond what I'm up for
here. In particular, it seems likely best done by completing the
plumbing of metadata through these layers and attaching the shuffle mask
in metadata which could have fully automatic dropping when encoding an
actual instruction.
llvm-svn: 218377
2014-09-24 17:39:41 +08:00
|
|
|
}
|
|
|
|
CS << "]";
|
2020-05-30 15:31:21 +08:00
|
|
|
OutStreamer.AddComment(CS.str());
|
[x86] Teach the instruction lowering to add comments describing constant
pool data being loaded into a vector register.
The comments take the form of:
# ymm0 = [a,b,c,d,...]
# xmm1 = <x,y,z...>
The []s are used for generic sequential data and the <>s are used for
specifically ConstantVector loads. Undef elements are printed as the
letter 'u', integers in decimal, and floating point values as floating
point values. Suggestions on improving the formatting or other aspects
of the display are very welcome.
My primary use case for this is to be able to FileCheck test masks
passed to vector shuffle instructions in-register. It isn't fantastic
for that (no decoding special zeroing semantics or other tricks), but it
at least puts the mask onto an instruction line that could reasonably be
checked. I've updated many of the new vector shuffle lowering tests to
leverage this in their test cases so that we're actually checking the
shuffle masks remain as expected.
Before implementing this, I tried a *bunch* of different approaches.
I looked into teaching the MCInstLower code to scan up the basic block
and find a definition of a register used in a shuffle instruction and
then decode that, but this seems incredibly brittle and complex.
I talked to Hal a lot about the "right" way to do this: attach the raw
shuffle mask to the instruction itself in some form of unencoded
operands, and then use that to emit the comments. I still think that's
the optimal solution here, but it proved to be beyond what I'm up for
here. In particular, it seems likely best done by completing the
plumbing of metadata through these layers and attaching the shuffle mask
in metadata which could have fully automatic dropping when encoding an
actual instruction.
llvm-svn: 218377
2014-09-24 17:39:41 +08:00
|
|
|
} else if (auto *CV = dyn_cast<ConstantVector>(C)) {
|
|
|
|
CS << "<";
|
2017-07-04 13:46:11 +08:00
|
|
|
for (int l = 0; l != NumLanes; ++l) {
|
2018-04-18 05:30:29 +08:00
|
|
|
for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands;
|
|
|
|
++i) {
|
2017-07-04 13:46:11 +08:00
|
|
|
if (i != 0 || l != 0)
|
|
|
|
CS << ",";
|
|
|
|
printConstant(CV->getOperand(i), CS);
|
[x86] Teach the instruction lowering to add comments describing constant
pool data being loaded into a vector register.
The comments take the form of:
# ymm0 = [a,b,c,d,...]
# xmm1 = <x,y,z...>
The []s are used for generic sequential data and the <>s are used for
specifically ConstantVector loads. Undef elements are printed as the
letter 'u', integers in decimal, and floating point values as floating
point values. Suggestions on improving the formatting or other aspects
of the display are very welcome.
My primary use case for this is to be able to FileCheck test masks
passed to vector shuffle instructions in-register. It isn't fantastic
for that (no decoding special zeroing semantics or other tricks), but it
at least puts the mask onto an instruction line that could reasonably be
checked. I've updated many of the new vector shuffle lowering tests to
leverage this in their test cases so that we're actually checking the
shuffle masks remain as expected.
Before implementing this, I tried a *bunch* of different approaches.
I looked into teaching the MCInstLower code to scan up the basic block
and find a definition of a register used in a shuffle instruction and
then decode that, but this seems incredibly brittle and complex.
I talked to Hal a lot about the "right" way to do this: attach the raw
shuffle mask to the instruction itself in some form of unencoded
operands, and then use that to emit the comments. I still think that's
the optimal solution here, but it proved to be beyond what I'm up for
here. In particular, it seems likely best done by completing the
plumbing of metadata through these layers and attaching the shuffle mask
in metadata which could have fully automatic dropping when encoding an
actual instruction.
llvm-svn: 218377
2014-09-24 17:39:41 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
CS << ">";
|
2020-05-30 15:31:21 +08:00
|
|
|
OutStreamer.AddComment(CS.str());
|
[x86] Teach the instruction lowering to add comments describing constant
pool data being loaded into a vector register.
The comments take the form of:
# ymm0 = [a,b,c,d,...]
# xmm1 = <x,y,z...>
The []s are used for generic sequential data and the <>s are used for
specifically ConstantVector loads. Undef elements are printed as the
letter 'u', integers in decimal, and floating point values as floating
point values. Suggestions on improving the formatting or other aspects
of the display are very welcome.
My primary use case for this is to be able to FileCheck test masks
passed to vector shuffle instructions in-register. It isn't fantastic
for that (no decoding special zeroing semantics or other tricks), but it
at least puts the mask onto an instruction line that could reasonably be
checked. I've updated many of the new vector shuffle lowering tests to
leverage this in their test cases so that we're actually checking the
shuffle masks remain as expected.
Before implementing this, I tried a *bunch* of different approaches.
I looked into teaching the MCInstLower code to scan up the basic block
and find a definition of a register used in a shuffle instruction and
then decode that, but this seems incredibly brittle and complex.
I talked to Hal a lot about the "right" way to do this: attach the raw
shuffle mask to the instruction itself in some form of unencoded
operands, and then use that to emit the comments. I still think that's
the optimal solution here, but it proved to be beyond what I'm up for
here. In particular, it seems likely best done by completing the
plumbing of metadata through these layers and attaching the shuffle mask
in metadata which could have fully automatic dropping when encoding an
actual instruction.
llvm-svn: 218377
2014-09-24 17:39:41 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
2020-05-30 15:31:21 +08:00
|
|
|
|
2018-10-15 09:51:53 +08:00
|
|
|
case X86::MOVDDUPrm:
|
|
|
|
case X86::VMOVDDUPrm:
|
|
|
|
case X86::VMOVDDUPZ128rm:
|
2017-07-04 13:46:11 +08:00
|
|
|
case X86::VBROADCASTSSrm:
|
|
|
|
case X86::VBROADCASTSSYrm:
|
2020-02-09 00:54:46 +08:00
|
|
|
case X86::VBROADCASTSSZ128rm:
|
|
|
|
case X86::VBROADCASTSSZ256rm:
|
|
|
|
case X86::VBROADCASTSSZrm:
|
2017-07-04 13:46:11 +08:00
|
|
|
case X86::VBROADCASTSDYrm:
|
2020-02-09 00:54:46 +08:00
|
|
|
case X86::VBROADCASTSDZ256rm:
|
|
|
|
case X86::VBROADCASTSDZrm:
|
2017-07-04 13:46:11 +08:00
|
|
|
case X86::VPBROADCASTBrm:
|
|
|
|
case X86::VPBROADCASTBYrm:
|
2020-02-09 00:54:46 +08:00
|
|
|
case X86::VPBROADCASTBZ128rm:
|
|
|
|
case X86::VPBROADCASTBZ256rm:
|
|
|
|
case X86::VPBROADCASTBZrm:
|
2017-07-04 13:46:11 +08:00
|
|
|
case X86::VPBROADCASTDrm:
|
|
|
|
case X86::VPBROADCASTDYrm:
|
2020-02-09 00:54:46 +08:00
|
|
|
case X86::VPBROADCASTDZ128rm:
|
|
|
|
case X86::VPBROADCASTDZ256rm:
|
|
|
|
case X86::VPBROADCASTDZrm:
|
2017-07-04 13:46:11 +08:00
|
|
|
case X86::VPBROADCASTQrm:
|
|
|
|
case X86::VPBROADCASTQYrm:
|
2020-02-09 00:54:46 +08:00
|
|
|
case X86::VPBROADCASTQZ128rm:
|
|
|
|
case X86::VPBROADCASTQZ256rm:
|
|
|
|
case X86::VPBROADCASTQZrm:
|
2017-07-04 13:46:11 +08:00
|
|
|
case X86::VPBROADCASTWrm:
|
|
|
|
case X86::VPBROADCASTWYrm:
|
2020-02-09 00:54:46 +08:00
|
|
|
case X86::VPBROADCASTWZ128rm:
|
|
|
|
case X86::VPBROADCASTWZ256rm:
|
|
|
|
case X86::VPBROADCASTWZrm:
|
2020-05-31 03:53:56 +08:00
|
|
|
assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
|
|
|
|
"Unexpected number of operands!");
|
|
|
|
if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
|
2017-07-04 13:46:11 +08:00
|
|
|
int NumElts;
|
|
|
|
switch (MI->getOpcode()) {
|
2018-07-02 14:42:42 +08:00
|
|
|
default: llvm_unreachable("Invalid opcode");
|
2020-02-09 00:54:46 +08:00
|
|
|
case X86::MOVDDUPrm: NumElts = 2; break;
|
|
|
|
case X86::VMOVDDUPrm: NumElts = 2; break;
|
|
|
|
case X86::VMOVDDUPZ128rm: NumElts = 2; break;
|
|
|
|
case X86::VBROADCASTSSrm: NumElts = 4; break;
|
|
|
|
case X86::VBROADCASTSSYrm: NumElts = 8; break;
|
|
|
|
case X86::VBROADCASTSSZ128rm: NumElts = 4; break;
|
|
|
|
case X86::VBROADCASTSSZ256rm: NumElts = 8; break;
|
|
|
|
case X86::VBROADCASTSSZrm: NumElts = 16; break;
|
|
|
|
case X86::VBROADCASTSDYrm: NumElts = 4; break;
|
|
|
|
case X86::VBROADCASTSDZ256rm: NumElts = 4; break;
|
|
|
|
case X86::VBROADCASTSDZrm: NumElts = 8; break;
|
|
|
|
case X86::VPBROADCASTBrm: NumElts = 16; break;
|
|
|
|
case X86::VPBROADCASTBYrm: NumElts = 32; break;
|
|
|
|
case X86::VPBROADCASTBZ128rm: NumElts = 16; break;
|
|
|
|
case X86::VPBROADCASTBZ256rm: NumElts = 32; break;
|
|
|
|
case X86::VPBROADCASTBZrm: NumElts = 64; break;
|
|
|
|
case X86::VPBROADCASTDrm: NumElts = 4; break;
|
|
|
|
case X86::VPBROADCASTDYrm: NumElts = 8; break;
|
|
|
|
case X86::VPBROADCASTDZ128rm: NumElts = 4; break;
|
|
|
|
case X86::VPBROADCASTDZ256rm: NumElts = 8; break;
|
|
|
|
case X86::VPBROADCASTDZrm: NumElts = 16; break;
|
|
|
|
case X86::VPBROADCASTQrm: NumElts = 2; break;
|
|
|
|
case X86::VPBROADCASTQYrm: NumElts = 4; break;
|
|
|
|
case X86::VPBROADCASTQZ128rm: NumElts = 2; break;
|
|
|
|
case X86::VPBROADCASTQZ256rm: NumElts = 4; break;
|
|
|
|
case X86::VPBROADCASTQZrm: NumElts = 8; break;
|
|
|
|
case X86::VPBROADCASTWrm: NumElts = 8; break;
|
|
|
|
case X86::VPBROADCASTWYrm: NumElts = 16; break;
|
|
|
|
case X86::VPBROADCASTWZ128rm: NumElts = 8; break;
|
|
|
|
case X86::VPBROADCASTWZ256rm: NumElts = 16; break;
|
|
|
|
case X86::VPBROADCASTWZrm: NumElts = 32; break;
|
2017-07-04 13:46:11 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::string Comment;
|
|
|
|
raw_string_ostream CS(Comment);
|
|
|
|
const MachineOperand &DstOp = MI->getOperand(0);
|
|
|
|
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
|
|
|
|
CS << "[";
|
|
|
|
for (int i = 0; i != NumElts; ++i) {
|
|
|
|
if (i != 0)
|
|
|
|
CS << ",";
|
|
|
|
printConstant(C, CS);
|
|
|
|
}
|
|
|
|
CS << "]";
|
2020-05-30 15:31:21 +08:00
|
|
|
OutStreamer.AddComment(CS.str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
|
|
|
X86MCInstLower MCInstLowering(*MF, *this);
|
|
|
|
const X86RegisterInfo *RI =
|
|
|
|
MF->getSubtarget<X86Subtarget>().getRegisterInfo();
|
|
|
|
|
|
|
|
// Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
|
|
|
|
// are compressed from EVEX encoding to VEX encoding.
|
|
|
|
if (TM.Options.MCOptions.ShowMCEncoding) {
|
|
|
|
if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
|
|
|
|
OutStreamer->AddComment("EVEX TO VEX Compression ", false);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add comments for values loaded from constant pool.
|
|
|
|
if (OutStreamer->isVerboseAsm())
|
|
|
|
addConstantComments(MI, *OutStreamer);
|
|
|
|
|
|
|
|
switch (MI->getOpcode()) {
|
|
|
|
case TargetOpcode::DBG_VALUE:
|
|
|
|
llvm_unreachable("Should be handled target independently");
|
|
|
|
|
|
|
|
// Emit nothing here but a comment if we can.
|
|
|
|
case X86::Int_MemBarrier:
|
|
|
|
OutStreamer->emitRawComment("MEMBARRIER");
|
|
|
|
return;
|
|
|
|
|
|
|
|
case X86::EH_RETURN:
|
|
|
|
case X86::EH_RETURN64: {
|
|
|
|
// Lower these as normal, but add some comments.
|
|
|
|
Register Reg = MI->getOperand(0).getReg();
|
|
|
|
OutStreamer->AddComment(StringRef("eh_return, addr: %") +
|
|
|
|
X86ATTInstPrinter::getRegisterName(Reg));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case X86::CLEANUPRET: {
|
|
|
|
// Lower these as normal, but add some comments.
|
|
|
|
OutStreamer->AddComment("CLEANUPRET");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case X86::CATCHRET: {
|
|
|
|
// Lower these as normal, but add some comments.
|
|
|
|
OutStreamer->AddComment("CATCHRET");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case X86::ENDBR32:
|
|
|
|
case X86::ENDBR64: {
|
|
|
|
// CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
|
|
|
|
// -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
|
|
|
|
// non-empty. If MI is the initial ENDBR, place the
|
|
|
|
// __patchable_function_entries label after ENDBR.
|
|
|
|
if (CurrentPatchableFunctionEntrySym &&
|
|
|
|
CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
|
|
|
|
MI == &MF->front().front()) {
|
|
|
|
MCInst Inst;
|
|
|
|
MCInstLowering.Lower(MI, Inst);
|
|
|
|
EmitAndCountInstruction(Inst);
|
|
|
|
CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
|
|
|
|
OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case X86::TAILJMPr:
|
|
|
|
case X86::TAILJMPm:
|
|
|
|
case X86::TAILJMPd:
|
|
|
|
case X86::TAILJMPd_CC:
|
|
|
|
case X86::TAILJMPr64:
|
|
|
|
case X86::TAILJMPm64:
|
|
|
|
case X86::TAILJMPd64:
|
|
|
|
case X86::TAILJMPd64_CC:
|
|
|
|
case X86::TAILJMPr64_REX:
|
|
|
|
case X86::TAILJMPm64_REX:
|
|
|
|
// Lower these as normal, but add some comments.
|
|
|
|
OutStreamer->AddComment("TAILCALL");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case X86::TLS_addr32:
|
|
|
|
case X86::TLS_addr64:
|
2020-12-03 06:20:36 +08:00
|
|
|
case X86::TLS_addrX32:
|
2020-05-30 15:31:21 +08:00
|
|
|
case X86::TLS_base_addr32:
|
|
|
|
case X86::TLS_base_addr64:
|
2020-12-03 06:20:36 +08:00
|
|
|
case X86::TLS_base_addrX32:
|
2020-05-30 15:31:21 +08:00
|
|
|
return LowerTlsAddr(MCInstLowering, *MI);
|
|
|
|
|
|
|
|
case X86::MOVPC32r: {
|
|
|
|
// This is a pseudo op for a two instruction sequence with a label, which
|
|
|
|
// looks like:
|
|
|
|
// call "L1$pb"
|
|
|
|
// "L1$pb":
|
|
|
|
// popl %esi
|
|
|
|
|
|
|
|
// Emit the call.
|
|
|
|
MCSymbol *PICBase = MF->getPICBaseSymbol();
|
|
|
|
// FIXME: We would like an efficient form for this, so we don't have to do a
|
|
|
|
// lot of extra uniquing.
|
|
|
|
EmitAndCountInstruction(
|
|
|
|
MCInstBuilder(X86::CALLpcrel32)
|
|
|
|
.addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
|
|
|
|
|
|
|
|
const X86FrameLowering *FrameLowering =
|
|
|
|
MF->getSubtarget<X86Subtarget>().getFrameLowering();
|
|
|
|
bool hasFP = FrameLowering->hasFP(*MF);
|
|
|
|
|
|
|
|
// TODO: This is needed only if we require precise CFA.
|
|
|
|
bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
|
|
|
|
!OutStreamer->getDwarfFrameInfos().back().End;
|
|
|
|
|
|
|
|
int stackGrowth = -RI->getSlotSize();
|
|
|
|
|
|
|
|
if (HasActiveDwarfFrame && !hasFP) {
|
|
|
|
OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Emit the label.
|
|
|
|
OutStreamer->emitLabel(PICBase);
|
|
|
|
|
|
|
|
// popl $reg
|
|
|
|
EmitAndCountInstruction(
|
|
|
|
MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
|
|
|
|
|
|
|
|
if (HasActiveDwarfFrame && !hasFP) {
|
|
|
|
OutStreamer->emitCFIAdjustCfaOffset(stackGrowth);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
case X86::ADD32ri: {
|
|
|
|
// Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
|
|
|
|
if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
|
|
|
|
break;
|
|
|
|
|
|
|
|
// Okay, we have something like:
|
|
|
|
// EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
|
|
|
|
|
|
|
|
// For this, we want to print something like:
|
|
|
|
// MYGLOBAL + (. - PICBASE)
|
|
|
|
// However, we can't generate a ".", so just emit a new label here and refer
|
|
|
|
// to it.
|
|
|
|
MCSymbol *DotSym = OutContext.createTempSymbol();
|
|
|
|
OutStreamer->emitLabel(DotSym);
|
|
|
|
|
|
|
|
// Now that we have emitted the label, lower the complex operand expression.
|
|
|
|
MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
|
|
|
|
|
|
|
|
const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
|
|
|
|
const MCExpr *PICBase =
|
|
|
|
MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
|
|
|
|
DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
|
|
|
|
|
|
|
|
DotExpr = MCBinaryExpr::createAdd(
|
|
|
|
MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
|
|
|
|
|
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
|
|
|
|
.addReg(MI->getOperand(0).getReg())
|
|
|
|
.addReg(MI->getOperand(1).getReg())
|
|
|
|
.addExpr(DotExpr));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case TargetOpcode::STATEPOINT:
|
|
|
|
return LowerSTATEPOINT(*MI, MCInstLowering);
|
|
|
|
|
|
|
|
case TargetOpcode::FAULTING_OP:
|
|
|
|
return LowerFAULTING_OP(*MI, MCInstLowering);
|
|
|
|
|
|
|
|
case TargetOpcode::FENTRY_CALL:
|
|
|
|
return LowerFENTRY_CALL(*MI, MCInstLowering);
|
|
|
|
|
|
|
|
case TargetOpcode::PATCHABLE_OP:
|
|
|
|
return LowerPATCHABLE_OP(*MI, MCInstLowering);
|
|
|
|
|
|
|
|
case TargetOpcode::STACKMAP:
|
|
|
|
return LowerSTACKMAP(*MI);
|
|
|
|
|
|
|
|
case TargetOpcode::PATCHPOINT:
|
|
|
|
return LowerPATCHPOINT(*MI, MCInstLowering);
|
|
|
|
|
|
|
|
case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
|
|
|
|
return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
|
|
|
|
|
|
|
|
case TargetOpcode::PATCHABLE_RET:
|
|
|
|
return LowerPATCHABLE_RET(*MI, MCInstLowering);
|
|
|
|
|
|
|
|
case TargetOpcode::PATCHABLE_TAIL_CALL:
|
|
|
|
return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
|
|
|
|
|
|
|
|
case TargetOpcode::PATCHABLE_EVENT_CALL:
|
|
|
|
return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
|
|
|
|
|
|
|
|
case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
|
|
|
|
return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
|
|
|
|
|
|
|
|
case X86::MORESTACK_RET:
|
|
|
|
EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
|
|
|
|
return;
|
|
|
|
|
|
|
|
case X86::MORESTACK_RET_RESTORE_R10:
|
|
|
|
// Return, then restore R10.
|
|
|
|
EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
|
|
|
|
EmitAndCountInstruction(
|
|
|
|
MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
|
|
|
|
return;
|
|
|
|
|
|
|
|
case X86::SEH_PushReg:
|
|
|
|
case X86::SEH_SaveReg:
|
|
|
|
case X86::SEH_SaveXMM:
|
|
|
|
case X86::SEH_StackAlloc:
|
|
|
|
case X86::SEH_StackAlign:
|
|
|
|
case X86::SEH_SetFrame:
|
|
|
|
case X86::SEH_PushFrame:
|
|
|
|
case X86::SEH_EndPrologue:
|
|
|
|
EmitSEHInstruction(MI);
|
|
|
|
return;
|
|
|
|
|
|
|
|
case X86::SEH_Epilogue: {
|
|
|
|
assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
|
|
|
|
MachineBasicBlock::const_iterator MBBI(MI);
|
|
|
|
// Check if preceded by a call and emit nop if so.
|
|
|
|
for (MBBI = PrevCrossBBInst(MBBI);
|
|
|
|
MBBI != MachineBasicBlock::const_iterator();
|
|
|
|
MBBI = PrevCrossBBInst(MBBI)) {
|
|
|
|
// Conservatively assume that pseudo instructions don't emit code and keep
|
|
|
|
// looking for a call. We may emit an unnecessary nop in some cases.
|
|
|
|
if (!MBBI->isPseudo()) {
|
|
|
|
if (MBBI->isCall())
|
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
|
|
|
|
break;
|
|
|
|
}
|
2017-07-04 13:46:11 +08:00
|
|
|
}
|
2020-05-30 15:31:21 +08:00
|
|
|
return;
|
|
|
|
}
|
2020-10-21 17:11:25 +08:00
|
|
|
case X86::UBSAN_UD1:
|
|
|
|
EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm)
|
|
|
|
.addReg(X86::EAX)
|
|
|
|
.addReg(X86::EAX)
|
|
|
|
.addImm(1)
|
|
|
|
.addReg(X86::NoRegister)
|
|
|
|
.addImm(MI->getOperand(0).getImm())
|
|
|
|
.addReg(X86::NoRegister));
|
|
|
|
return;
|
2014-09-24 10:16:12 +08:00
|
|
|
}
|
2012-08-02 02:39:17 +08:00
|
|
|
|
2009-09-13 04:34:57 +08:00
|
|
|
MCInst TmpInst;
|
|
|
|
MCInstLowering.Lower(MI, TmpInst);
|
2014-10-28 03:40:35 +08:00
|
|
|
|
|
|
|
// Stackmap shadows cannot include branch targets, so we can count the bytes
|
2014-10-28 06:38:45 +08:00
|
|
|
// in a call towards the shadow, but must ensure that the no thread returns
|
|
|
|
// in to the stackmap shadow. The only way to achieve this is if the call
|
|
|
|
// is at the end of the shadow.
|
|
|
|
if (MI->isCall()) {
|
|
|
|
// Count then size of the call towards the shadow
|
2016-04-19 13:24:47 +08:00
|
|
|
SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get());
|
2014-10-28 06:38:45 +08:00
|
|
|
// Then flush the shadow so that we fill with nops before the call, not
|
|
|
|
// after it.
|
2015-04-25 03:11:51 +08:00
|
|
|
SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
|
2014-10-28 06:38:45 +08:00
|
|
|
// Then emit the call
|
2020-02-14 13:58:16 +08:00
|
|
|
OutStreamer->emitInstruction(TmpInst, getSubtargetInfo());
|
2014-10-28 06:38:45 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
EmitAndCountInstruction(TmpInst);
|
2009-09-03 01:35:12 +08:00
|
|
|
}
|