2018-06-19 19:28:59 +08:00
|
|
|
//===-- Target.cpp ----------------------------------------------*- C++ -*-===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "../Target.h"
|
|
|
|
|
2018-06-26 16:49:30 +08:00
|
|
|
#include "../Latency.h"
|
|
|
|
#include "../Uops.h"
|
2018-06-28 15:41:16 +08:00
|
|
|
#include "MCTargetDesc/X86BaseInfo.h"
|
2018-06-25 21:12:02 +08:00
|
|
|
#include "MCTargetDesc/X86MCTargetDesc.h"
|
2018-06-20 19:54:35 +08:00
|
|
|
#include "X86.h"
|
2018-06-25 21:12:02 +08:00
|
|
|
#include "X86RegisterInfo.h"
|
2018-07-03 14:17:05 +08:00
|
|
|
#include "X86Subtarget.h"
|
2018-06-25 21:12:02 +08:00
|
|
|
#include "llvm/MC/MCInstBuilder.h"
|
2018-06-20 19:54:35 +08:00
|
|
|
|
2018-06-19 19:28:59 +08:00
|
|
|
namespace exegesis {
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
2018-06-28 15:41:16 +08:00
|
|
|
// Common code for X86 Uops and Latency runners.
|
|
|
|
template <typename Impl> class X86BenchmarkRunner : public Impl {
|
|
|
|
using Impl::Impl;
|
2018-06-26 16:49:30 +08:00
|
|
|
|
2018-08-03 17:29:38 +08:00
|
|
|
llvm::Expected<CodeTemplate>
|
|
|
|
generateCodeTemplate(unsigned Opcode) const override {
|
2018-06-28 15:41:16 +08:00
|
|
|
// Test whether we can generate a snippet for this instruction.
|
|
|
|
const auto &InstrInfo = this->State.getInstrInfo();
|
|
|
|
const auto OpcodeName = InstrInfo.getName(Opcode);
|
|
|
|
if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") ||
|
|
|
|
OpcodeName.startswith("ADJCALLSTACK")) {
|
|
|
|
return llvm::make_error<BenchmarkFailure>(
|
|
|
|
"Unsupported opcode: Push/Pop/AdjCallStack");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle X87.
|
|
|
|
const auto &InstrDesc = InstrInfo.get(Opcode);
|
|
|
|
const unsigned FPInstClass = InstrDesc.TSFlags & llvm::X86II::FPTypeMask;
|
|
|
|
const Instruction Instr(InstrDesc, this->RATC);
|
|
|
|
switch (FPInstClass) {
|
|
|
|
case llvm::X86II::NotFP:
|
|
|
|
break;
|
|
|
|
case llvm::X86II::ZeroArgFP:
|
2018-07-05 21:54:51 +08:00
|
|
|
return llvm::make_error<BenchmarkFailure>("Unsupported x87 ZeroArgFP");
|
2018-06-28 15:41:16 +08:00
|
|
|
case llvm::X86II::OneArgFP:
|
2018-07-05 21:54:51 +08:00
|
|
|
return llvm::make_error<BenchmarkFailure>("Unsupported x87 OneArgFP");
|
2018-06-28 15:41:16 +08:00
|
|
|
case llvm::X86II::OneArgFPRW:
|
|
|
|
case llvm::X86II::TwoArgFP: {
|
|
|
|
// These are instructions like
|
|
|
|
// - `ST(0) = fsqrt(ST(0))` (OneArgFPRW)
|
|
|
|
// - `ST(0) = ST(0) + ST(i)` (TwoArgFP)
|
|
|
|
// They are intrinsically serial and do not modify the state of the stack.
|
|
|
|
// We generate the same code for latency and uops.
|
2018-08-03 17:29:38 +08:00
|
|
|
return this->generateSelfAliasingCodeTemplate(Instr);
|
2018-06-26 16:49:30 +08:00
|
|
|
}
|
2018-06-28 15:41:16 +08:00
|
|
|
case llvm::X86II::CompareFP:
|
|
|
|
return Impl::handleCompareFP(Instr);
|
|
|
|
case llvm::X86II::CondMovFP:
|
|
|
|
return Impl::handleCondMovFP(Instr);
|
|
|
|
case llvm::X86II::SpecialFP:
|
2018-07-05 21:54:51 +08:00
|
|
|
return llvm::make_error<BenchmarkFailure>("Unsupported x87 SpecialFP");
|
2018-06-28 15:41:16 +08:00
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown FP Type!");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fallback to generic implementation.
|
2018-08-03 17:29:38 +08:00
|
|
|
return Impl::Base::generateCodeTemplate(Opcode);
|
2018-06-26 16:49:30 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2018-06-28 15:41:16 +08:00
|
|
|
class X86LatencyImpl : public LatencyBenchmarkRunner {
|
|
|
|
protected:
|
|
|
|
using Base = LatencyBenchmarkRunner;
|
|
|
|
using Base::Base;
|
2018-08-03 17:29:38 +08:00
|
|
|
llvm::Expected<CodeTemplate> handleCompareFP(const Instruction &Instr) const {
|
2018-06-28 15:41:16 +08:00
|
|
|
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CompareFP");
|
|
|
|
}
|
2018-08-03 17:29:38 +08:00
|
|
|
llvm::Expected<CodeTemplate> handleCondMovFP(const Instruction &Instr) const {
|
2018-06-28 15:41:16 +08:00
|
|
|
return llvm::make_error<BenchmarkFailure>("Unsupported x87 CondMovFP");
|
|
|
|
}
|
|
|
|
};
|
2018-06-26 16:49:30 +08:00
|
|
|
|
2018-06-28 15:41:16 +08:00
|
|
|
class X86UopsImpl : public UopsBenchmarkRunner {
|
|
|
|
protected:
|
|
|
|
using Base = UopsBenchmarkRunner;
|
|
|
|
using Base::Base;
|
2018-07-05 21:54:51 +08:00
|
|
|
// We can compute uops for any FP instruction that does not grow or shrink the
|
|
|
|
// stack (either do not touch the stack or push as much as they pop).
|
2018-08-03 17:29:38 +08:00
|
|
|
llvm::Expected<CodeTemplate> handleCompareFP(const Instruction &Instr) const {
|
|
|
|
return generateUnconstrainedCodeTemplate(
|
2018-07-05 21:54:51 +08:00
|
|
|
Instr, "instruction does not grow/shrink the FP stack");
|
2018-06-28 15:41:16 +08:00
|
|
|
}
|
2018-08-03 17:29:38 +08:00
|
|
|
llvm::Expected<CodeTemplate> handleCondMovFP(const Instruction &Instr) const {
|
|
|
|
return generateUnconstrainedCodeTemplate(
|
2018-07-05 21:54:51 +08:00
|
|
|
Instr, "instruction does not grow/shrink the FP stack");
|
2018-06-26 16:49:30 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2018-06-19 19:28:59 +08:00
|
|
|
class ExegesisX86Target : public ExegesisTarget {
|
2018-06-20 19:54:35 +08:00
|
|
|
void addTargetSpecificPasses(llvm::PassManagerBase &PM) const override {
|
|
|
|
// Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F.
|
2018-06-28 15:41:16 +08:00
|
|
|
PM.add(llvm::createX86FloatingPointStackifierPass());
|
2018-06-20 19:54:35 +08:00
|
|
|
}
|
|
|
|
|
2018-08-01 22:41:45 +08:00
|
|
|
unsigned getScratchMemoryRegister(const llvm::Triple &TT) const override {
|
|
|
|
if (!TT.isArch64Bit()) {
|
|
|
|
// FIXME: This would require popping from the stack, so we would have to
|
|
|
|
// add some additional setup code.
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return TT.isOSWindows() ? llvm::X86::RCX : llvm::X86::RDI;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned getMaxMemoryAccessSize() const override { return 64; }
|
|
|
|
|
2018-08-02 19:12:02 +08:00
|
|
|
void fillMemoryOperands(InstructionBuilder &IB, unsigned Reg,
|
2018-08-01 22:41:45 +08:00
|
|
|
unsigned Offset) const override {
|
|
|
|
// FIXME: For instructions that read AND write to memory, we use the same
|
|
|
|
// value for input and output.
|
2018-08-02 19:12:02 +08:00
|
|
|
for (size_t I = 0, E = IB.Instr.Operands.size(); I < E; ++I) {
|
|
|
|
const Operand *Op = &IB.Instr.Operands[I];
|
2018-08-01 22:41:45 +08:00
|
|
|
if (Op->IsExplicit && Op->IsMem) {
|
|
|
|
// Case 1: 5-op memory.
|
|
|
|
assert((I + 5 <= E) && "x86 memory references are always 5 ops");
|
2018-08-02 19:12:02 +08:00
|
|
|
IB.getValueFor(*Op) = llvm::MCOperand::createReg(Reg); // BaseReg
|
|
|
|
Op = &IB.Instr.Operands[++I];
|
2018-08-01 22:41:45 +08:00
|
|
|
assert(Op->IsMem);
|
|
|
|
assert(Op->IsExplicit);
|
2018-08-02 19:12:02 +08:00
|
|
|
IB.getValueFor(*Op) = llvm::MCOperand::createImm(1); // ScaleAmt
|
|
|
|
Op = &IB.Instr.Operands[++I];
|
2018-08-01 22:41:45 +08:00
|
|
|
assert(Op->IsMem);
|
|
|
|
assert(Op->IsExplicit);
|
2018-08-02 19:12:02 +08:00
|
|
|
IB.getValueFor(*Op) = llvm::MCOperand::createReg(0); // IndexReg
|
|
|
|
Op = &IB.Instr.Operands[++I];
|
2018-08-01 22:41:45 +08:00
|
|
|
assert(Op->IsMem);
|
|
|
|
assert(Op->IsExplicit);
|
2018-08-02 19:12:02 +08:00
|
|
|
IB.getValueFor(*Op) = llvm::MCOperand::createImm(Offset); // Disp
|
|
|
|
Op = &IB.Instr.Operands[++I];
|
2018-08-01 22:41:45 +08:00
|
|
|
assert(Op->IsMem);
|
|
|
|
assert(Op->IsExplicit);
|
2018-08-02 19:12:02 +08:00
|
|
|
IB.getValueFor(*Op) = llvm::MCOperand::createReg(0); // Segment
|
2018-08-01 22:41:45 +08:00
|
|
|
// Case2: segment:index addressing. We assume that ES is 0.
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-03 14:17:05 +08:00
|
|
|
std::vector<llvm::MCInst> setRegToConstant(const llvm::MCSubtargetInfo &STI,
|
|
|
|
unsigned Reg) const override {
|
|
|
|
// GPR.
|
2018-07-02 14:39:55 +08:00
|
|
|
if (llvm::X86::GR8RegClass.contains(Reg))
|
2018-06-25 21:12:02 +08:00
|
|
|
return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)};
|
2018-07-02 14:39:55 +08:00
|
|
|
if (llvm::X86::GR16RegClass.contains(Reg))
|
2018-06-25 21:12:02 +08:00
|
|
|
return {llvm::MCInstBuilder(llvm::X86::MOV16ri).addReg(Reg).addImm(1)};
|
2018-07-02 14:39:55 +08:00
|
|
|
if (llvm::X86::GR32RegClass.contains(Reg))
|
2018-06-25 21:12:02 +08:00
|
|
|
return {llvm::MCInstBuilder(llvm::X86::MOV32ri).addReg(Reg).addImm(1)};
|
2018-07-02 14:39:55 +08:00
|
|
|
if (llvm::X86::GR64RegClass.contains(Reg))
|
2018-06-25 21:12:02 +08:00
|
|
|
return {llvm::MCInstBuilder(llvm::X86::MOV64ri32).addReg(Reg).addImm(1)};
|
2018-07-03 14:17:05 +08:00
|
|
|
// MMX.
|
|
|
|
if (llvm::X86::VR64RegClass.contains(Reg))
|
|
|
|
return setVectorRegToConstant(Reg, 8, llvm::X86::MMX_MOVQ64rm);
|
|
|
|
// {X,Y,Z}MM.
|
|
|
|
if (llvm::X86::VR128XRegClass.contains(Reg)) {
|
|
|
|
if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
|
|
|
|
return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQU32Z128rm);
|
|
|
|
if (STI.getFeatureBits()[llvm::X86::FeatureAVX])
|
|
|
|
return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQUrm);
|
|
|
|
return setVectorRegToConstant(Reg, 16, llvm::X86::MOVDQUrm);
|
|
|
|
}
|
|
|
|
if (llvm::X86::VR256XRegClass.contains(Reg)) {
|
|
|
|
if (STI.getFeatureBits()[llvm::X86::FeatureAVX512])
|
|
|
|
return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQU32Z256rm);
|
2018-06-25 21:12:02 +08:00
|
|
|
return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQUYrm);
|
2018-07-03 14:17:05 +08:00
|
|
|
}
|
2018-07-02 14:39:55 +08:00
|
|
|
if (llvm::X86::VR512RegClass.contains(Reg))
|
2018-07-03 14:17:05 +08:00
|
|
|
return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU32Zrm);
|
|
|
|
// X87.
|
2018-06-28 15:41:16 +08:00
|
|
|
if (llvm::X86::RFP32RegClass.contains(Reg) ||
|
|
|
|
llvm::X86::RFP64RegClass.contains(Reg) ||
|
2018-07-02 14:39:55 +08:00
|
|
|
llvm::X86::RFP80RegClass.contains(Reg))
|
2018-06-28 15:41:16 +08:00
|
|
|
return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m);
|
2018-07-05 21:54:51 +08:00
|
|
|
if (Reg == llvm::X86::EFLAGS) {
|
|
|
|
// Set all flags to 0 but the bits that are "reserved and set to 1".
|
|
|
|
constexpr const uint32_t kImmValue = 0x00007002u;
|
|
|
|
std::vector<llvm::MCInst> Result;
|
|
|
|
Result.push_back(allocateStackSpace(8));
|
|
|
|
Result.push_back(fillStackSpace(llvm::X86::MOV64mi32, 0, kImmValue));
|
|
|
|
Result.push_back(llvm::MCInstBuilder(llvm::X86::POPF64)); // Also pops.
|
|
|
|
return Result;
|
|
|
|
}
|
2018-06-25 21:12:02 +08:00
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
2018-06-26 16:49:30 +08:00
|
|
|
std::unique_ptr<BenchmarkRunner>
|
|
|
|
createLatencyBenchmarkRunner(const LLVMState &State) const override {
|
2018-07-03 14:17:05 +08:00
|
|
|
return llvm::make_unique<X86BenchmarkRunner<X86LatencyImpl>>(State);
|
2018-06-26 16:49:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::unique_ptr<BenchmarkRunner>
|
|
|
|
createUopsBenchmarkRunner(const LLVMState &State) const override {
|
2018-06-28 15:41:16 +08:00
|
|
|
return llvm::make_unique<X86BenchmarkRunner<X86UopsImpl>>(State);
|
2018-06-26 16:49:30 +08:00
|
|
|
}
|
|
|
|
|
2018-06-19 19:28:59 +08:00
|
|
|
bool matchesArch(llvm::Triple::ArchType Arch) const override {
|
|
|
|
return Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86;
|
|
|
|
}
|
2018-06-25 21:12:02 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
// setRegToConstant() specialized for a vector register of size
|
|
|
|
// `RegSizeBytes`. `RMOpcode` is the opcode used to do a memory -> vector
|
|
|
|
// register load.
|
|
|
|
static std::vector<llvm::MCInst>
|
|
|
|
setVectorRegToConstant(const unsigned Reg, const unsigned RegSizeBytes,
|
|
|
|
const unsigned RMOpcode) {
|
|
|
|
// There is no instruction to directly set XMM, go through memory.
|
|
|
|
// Since vector values can be interpreted as integers of various sizes (8
|
|
|
|
// to 64 bits) as well as floats and double, so we chose an immediate
|
|
|
|
// value that has set bits for all byte values and is a normal float/
|
|
|
|
// double. 0x40404040 is ~32.5 when interpreted as a double and ~3.0f when
|
|
|
|
// interpreted as a float.
|
2018-07-05 21:54:51 +08:00
|
|
|
constexpr const uint32_t kImmValue = 0x40404040u;
|
2018-06-25 21:12:02 +08:00
|
|
|
std::vector<llvm::MCInst> Result;
|
2018-07-05 21:54:51 +08:00
|
|
|
Result.push_back(allocateStackSpace(RegSizeBytes));
|
|
|
|
constexpr const unsigned kMov32NumBytes = 4;
|
|
|
|
for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += kMov32NumBytes) {
|
|
|
|
Result.push_back(fillStackSpace(llvm::X86::MOV32mi, Disp, kImmValue));
|
2018-06-25 21:12:02 +08:00
|
|
|
}
|
2018-07-05 21:54:51 +08:00
|
|
|
Result.push_back(loadToReg(Reg, RMOpcode));
|
|
|
|
Result.push_back(releaseStackSpace(RegSizeBytes));
|
2018-06-25 21:12:02 +08:00
|
|
|
return Result;
|
|
|
|
}
|
2018-07-05 21:54:51 +08:00
|
|
|
|
|
|
|
// Allocates scratch memory on the stack.
|
|
|
|
static llvm::MCInst allocateStackSpace(unsigned Bytes) {
|
|
|
|
return llvm::MCInstBuilder(llvm::X86::SUB64ri8)
|
|
|
|
.addReg(llvm::X86::RSP)
|
|
|
|
.addReg(llvm::X86::RSP)
|
|
|
|
.addImm(Bytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fills scratch memory at offset `OffsetBytes` with value `Imm`.
|
|
|
|
static llvm::MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes,
|
|
|
|
uint64_t Imm) {
|
|
|
|
return llvm::MCInstBuilder(MovOpcode)
|
|
|
|
// Address = ESP
|
|
|
|
.addReg(llvm::X86::RSP) // BaseReg
|
|
|
|
.addImm(1) // ScaleAmt
|
|
|
|
.addReg(0) // IndexReg
|
|
|
|
.addImm(OffsetBytes) // Disp
|
|
|
|
.addReg(0) // Segment
|
|
|
|
// Immediate.
|
|
|
|
.addImm(Imm);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Loads scratch memory into register `Reg` using opcode `RMOpcode`.
|
|
|
|
static llvm::MCInst loadToReg(unsigned Reg, unsigned RMOpcode) {
|
|
|
|
return llvm::MCInstBuilder(RMOpcode)
|
|
|
|
.addReg(Reg)
|
|
|
|
// Address = ESP
|
|
|
|
.addReg(llvm::X86::RSP) // BaseReg
|
|
|
|
.addImm(1) // ScaleAmt
|
|
|
|
.addReg(0) // IndexReg
|
|
|
|
.addImm(0) // Disp
|
|
|
|
.addReg(0); // Segment
|
|
|
|
}
|
|
|
|
|
|
|
|
// Releases scratch memory.
|
|
|
|
static llvm::MCInst releaseStackSpace(unsigned Bytes) {
|
|
|
|
return llvm::MCInstBuilder(llvm::X86::ADD64ri8)
|
|
|
|
.addReg(llvm::X86::RSP)
|
|
|
|
.addReg(llvm::X86::RSP)
|
|
|
|
.addImm(Bytes);
|
|
|
|
}
|
2018-06-19 19:28:59 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
2018-06-25 19:22:23 +08:00
|
|
|
static ExegesisTarget *getTheExegesisX86Target() {
|
2018-06-19 19:28:59 +08:00
|
|
|
static ExegesisX86Target Target;
|
|
|
|
return &Target;
|
|
|
|
}
|
|
|
|
|
|
|
|
void InitializeX86ExegesisTarget() {
|
|
|
|
ExegesisTarget::registerTarget(getTheExegesisX86Target());
|
|
|
|
}
|
|
|
|
|
2018-06-25 19:22:23 +08:00
|
|
|
} // namespace exegesis
|