forked from OSchip/llvm-project
138 lines
5.1 KiB
C++
138 lines
5.1 KiB
C++
//===-- Latency.cpp ---------------------------------------------*- C++ -*-===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "Latency.h"
|
|
|
|
#include "Assembler.h"
|
|
#include "BenchmarkRunner.h"
|
|
#include "MCInstrDescView.h"
|
|
#include "PerfHelper.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/MC/MCInst.h"
|
|
#include "llvm/MC/MCInstBuilder.h"
|
|
#include "llvm/Support/FormatVariadic.h"
|
|
|
|
namespace exegesis {
|
|
|
|
static bool hasUnknownOperand(const llvm::MCOperandInfo &OpInfo) {
|
|
return OpInfo.OperandType == llvm::MCOI::OPERAND_UNKNOWN;
|
|
}
|
|
|
|
// FIXME: Handle memory, see PR36905.
|
|
static bool hasMemoryOperand(const llvm::MCOperandInfo &OpInfo) {
|
|
return OpInfo.OperandType == llvm::MCOI::OPERAND_MEMORY;
|
|
}
|
|
|
|
LatencySnippetGenerator::~LatencySnippetGenerator() = default;
|
|
|
|
llvm::Error LatencySnippetGenerator::isInfeasible(
|
|
const llvm::MCInstrDesc &MCInstrDesc) const {
|
|
if (llvm::any_of(MCInstrDesc.operands(), hasUnknownOperand))
|
|
return llvm::make_error<BenchmarkFailure>(
|
|
"Infeasible : has unknown operands");
|
|
if (llvm::any_of(MCInstrDesc.operands(), hasMemoryOperand))
|
|
return llvm::make_error<BenchmarkFailure>(
|
|
"Infeasible : has memory operands");
|
|
return llvm::Error::success();
|
|
}
|
|
|
|
llvm::Expected<CodeTemplate>
|
|
LatencySnippetGenerator::generateTwoInstructionPrototype(
|
|
const Instruction &Instr) const {
|
|
std::vector<unsigned> Opcodes;
|
|
Opcodes.resize(State.getInstrInfo().getNumOpcodes());
|
|
std::iota(Opcodes.begin(), Opcodes.end(), 0U);
|
|
std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator());
|
|
for (const unsigned OtherOpcode : Opcodes) {
|
|
if (OtherOpcode == Instr.Description->Opcode)
|
|
continue;
|
|
const auto &OtherInstrDesc = State.getInstrInfo().get(OtherOpcode);
|
|
if (auto E = isInfeasible(OtherInstrDesc)) {
|
|
llvm::consumeError(std::move(E));
|
|
continue;
|
|
}
|
|
const Instruction OtherInstr(OtherInstrDesc, RATC);
|
|
const AliasingConfigurations Forward(Instr, OtherInstr);
|
|
const AliasingConfigurations Back(OtherInstr, Instr);
|
|
if (Forward.empty() || Back.empty())
|
|
continue;
|
|
InstructionBuilder ThisIB(Instr);
|
|
InstructionBuilder OtherIB(OtherInstr);
|
|
if (!Forward.hasImplicitAliasing())
|
|
setRandomAliasing(Forward, ThisIB, OtherIB);
|
|
if (!Back.hasImplicitAliasing())
|
|
setRandomAliasing(Back, OtherIB, ThisIB);
|
|
CodeTemplate CT;
|
|
CT.Info = llvm::formatv("creating cycle through {0}.",
|
|
State.getInstrInfo().getName(OtherOpcode));
|
|
CT.Instructions.push_back(std::move(ThisIB));
|
|
CT.Instructions.push_back(std::move(OtherIB));
|
|
return std::move(CT);
|
|
}
|
|
return llvm::make_error<BenchmarkFailure>(
|
|
"Infeasible : Didn't find any scheme to make the instruction serial");
|
|
}
|
|
|
|
llvm::Expected<CodeTemplate>
|
|
LatencySnippetGenerator::generateCodeTemplate(unsigned Opcode) const {
|
|
const auto &InstrDesc = State.getInstrInfo().get(Opcode);
|
|
if (auto E = isInfeasible(InstrDesc))
|
|
return std::move(E);
|
|
const Instruction Instr(InstrDesc, RATC);
|
|
if (auto CT = generateSelfAliasingCodeTemplate(Instr))
|
|
return CT;
|
|
else
|
|
llvm::consumeError(CT.takeError());
|
|
// No self aliasing, trying to create a dependency through another opcode.
|
|
return generateTwoInstructionPrototype(Instr);
|
|
}
|
|
|
|
const char *LatencyBenchmarkRunner::getCounterName() const {
|
|
if (!State.getSubtargetInfo().getSchedModel().hasExtraProcessorInfo())
|
|
llvm::report_fatal_error("sched model is missing extra processor info!");
|
|
const char *CounterName = State.getSubtargetInfo()
|
|
.getSchedModel()
|
|
.getExtraProcessorInfo()
|
|
.PfmCounters.CycleCounter;
|
|
if (!CounterName)
|
|
llvm::report_fatal_error("sched model does not define a cycle counter");
|
|
return CounterName;
|
|
}
|
|
|
|
LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default;
|
|
|
|
std::vector<BenchmarkMeasure>
|
|
LatencyBenchmarkRunner::runMeasurements(const ExecutableFunction &Function,
|
|
ScratchSpace &Scratch,
|
|
const unsigned NumRepetitions) const {
|
|
// Cycle measurements include some overhead from the kernel. Repeat the
|
|
// measure several times and take the minimum value.
|
|
constexpr const int NumMeasurements = 30;
|
|
int64_t MinLatency = std::numeric_limits<int64_t>::max();
|
|
const char *CounterName = getCounterName();
|
|
if (!CounterName)
|
|
llvm::report_fatal_error("could not determine cycle counter name");
|
|
const pfm::PerfEvent CyclesPerfEvent(CounterName);
|
|
if (!CyclesPerfEvent.valid())
|
|
llvm::report_fatal_error("invalid perf event");
|
|
for (size_t I = 0; I < NumMeasurements; ++I) {
|
|
pfm::Counter Counter(CyclesPerfEvent);
|
|
Scratch.clear();
|
|
Counter.start();
|
|
Function(Scratch.ptr());
|
|
Counter.stop();
|
|
const int64_t Value = Counter.read();
|
|
if (Value < MinLatency)
|
|
MinLatency = Value;
|
|
}
|
|
return {{"latency", static_cast<double>(MinLatency) / NumRepetitions, ""}};
|
|
}
|
|
|
|
} // namespace exegesis
|