forked from OSchip/llvm-project
Revert "[llvm-exegesis] Add benchmark latency option on X86 that uses LBR for more precise measurements."
From @erichkeane: ``` This patch doesn't seem to build for me: /iusers/ekeane1/workspaces/llvm-project/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp: In function ‘llvm::Error llvm::exegesis::parseDataBuffer(const char*, size_t, const void*, const void*, llvm::SmallVector<long int, 4>*)’: /iusers/ekeane1/workspaces/llvm-project/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp:99:37: error: ‘struct perf_branch_entry’ has no member named ‘cycles’ CycleArray->push_back(Entry.cycles); I'm on RHEL7, so I have kernel 3.10, so it doesn't have 'cycles'. According ot this: https://elixir.bootlin.com/linux/v4.3/source/include/uapi/linux/perf_event.h#L963 kernel 4.3 is the first time that 'cycles' appeared in this structure. ```
This commit is contained in:
parent
7ebc6bed84
commit
6bddd099ac
|
@ -192,24 +192,10 @@ OPTIONS
|
|||
|
||||
.. option:: -mode=[latency|uops|inverse_throughput|analysis]
|
||||
|
||||
Specify the run mode. Note that some modes have additional requirements and options.
|
||||
Specify the run mode. Note that if you pick `analysis` mode, you also need
|
||||
to specify at least one of the `-analysis-clusters-output-file=` and
|
||||
`-analysis-inconsistencies-output-file=`.
|
||||
|
||||
`latency` mode can be make use of either RDTSC or LBR.
|
||||
`latency[LBR]` is only available on X86 (at least `Skylake`).
|
||||
To run in this mode, a positive value must be specified for `x86-lbr-sample-period` and `--repetition-mode=loop`
|
||||
|
||||
In `analysis` mode, you also need to specify at least one of the
|
||||
`-analysis-clusters-output-file=` and `-analysis-inconsistencies-output-file=`.
|
||||
|
||||
.. option:: -x86-lbr-sample-period=<nBranches/sample>
|
||||
|
||||
Specify the LBR sampling period - how many branches before we take a sample.
|
||||
When a positive value is specified for this option and when the mode is `latency`,
|
||||
we will use LBRs for measuring.
|
||||
On choosing the "right" sampling period, a small value is preferred, but throttling
|
||||
could occur if the sampling is too frequent. A prime number should be used to
|
||||
avoid consistently skipping certain blocks.
|
||||
|
||||
.. option:: -repetition-mode=[duplicate|loop|min]
|
||||
|
||||
Specify the repetition mode. `duplicate` will create a large, straight line
|
||||
|
|
|
@ -1,4 +0,0 @@
|
|||
# LLVM-EXEGESIS-LIVEIN RDI
|
||||
# LLVM-EXEGESIS-DEFREG XMM1 42
|
||||
movq $2, %rdi
|
||||
addq $0x10, %rdi
|
|
@ -1,31 +0,0 @@
|
|||
import subprocess
|
||||
import lit.util
|
||||
|
||||
if not ('X86' in config.root.targets):
|
||||
# We need support for X86.
|
||||
config.unsupported = True
|
||||
|
||||
elif not ('x86_64' in config.root.host_triple):
|
||||
# We need to be running on an X86 host.
|
||||
config.unsupported = True
|
||||
|
||||
else:
|
||||
# We need libpfm to be installed and the host to be at least skylake.
|
||||
llvm_exegesis_exe = lit.util.which('llvm-exegesis', config.llvm_tools_dir)
|
||||
if not llvm_exegesis_exe:
|
||||
print('llvm-exegesis not found')
|
||||
config.unsupported = True
|
||||
else:
|
||||
try:
|
||||
with open(os.devnull, 'w') as quiet:
|
||||
check_llvm_exegesis_uops_result = subprocess.call(
|
||||
[llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
|
||||
check_llvm_exegesis_latency_result = subprocess.call(
|
||||
[llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
|
||||
except OSError:
|
||||
print('could not exec llvm-exegesis')
|
||||
config.unsupported = True
|
||||
if not check_llvm_exegesis_uops_result == 0:
|
||||
config.unsupported = True
|
||||
if not check_llvm_exegesis_latency_result == 0:
|
||||
config.unsupported = True
|
|
@ -1,18 +0,0 @@
|
|||
# RUN: llvm-exegesis -mode=latency --repetition-mode=loop --x86-lbr-sample-period=521 --snippets-file=%p/Inputs/mov_add.att
|
||||
|
||||
|
||||
CHECK: ---
|
||||
CHECK-NEXT: mode: latency
|
||||
CHECK-NEXT: key:
|
||||
CHECK-NEXT: instructions:
|
||||
CHECK-NEXT: 'MOV64ri32 RDI i_0x2'
|
||||
CHECK-NEXT: 'ADD64ri8 RDI RDI i_0x10'
|
||||
CHECK-NEXT: config: ''
|
||||
CHECK-NEXT: {{.*}}
|
||||
CHECK-NEXT: {{.*}}
|
||||
CHECK-NEXT: {{.*}}
|
||||
CHECK-NEXT: {{.*}}
|
||||
CHECK-NEXT: num_repetitions: 10000
|
||||
CHECK-NEXT: measurements:
|
||||
CHECK-NEXT: {{.*}} value: 0.0001, per_snippet_value: 0.0002 {{.*}}
|
||||
CHECK-LAST: ...
|
|
@ -55,6 +55,7 @@ private:
|
|||
static void
|
||||
accumulateCounterValues(const llvm::SmallVector<int64_t, 4> &NewValues,
|
||||
llvm::SmallVector<int64_t, 4> *Result) {
|
||||
|
||||
const size_t NumValues = std::max(NewValues.size(), Result->size());
|
||||
if (NumValues > Result->size())
|
||||
Result->resize(NumValues, 0);
|
||||
|
@ -105,10 +106,10 @@ private:
|
|||
if (Crashed)
|
||||
return make_error<SnippetCrash>("snippet crashed while running");
|
||||
}
|
||||
|
||||
auto ValueOrError = Counter->readOrError(Function.getFunctionBytes());
|
||||
auto ValueOrError = Counter->readOrError();
|
||||
if (!ValueOrError)
|
||||
return ValueOrError.takeError();
|
||||
|
||||
accumulateCounterValues(ValueOrError.get(), &CounterValues);
|
||||
}
|
||||
return CounterValues;
|
||||
|
|
|
@ -128,8 +128,7 @@ int64_t Counter::read() const {
|
|||
return -1;
|
||||
}
|
||||
|
||||
llvm::Expected<llvm::SmallVector<int64_t, 4>>
|
||||
Counter::readOrError(StringRef /*unused*/) const {
|
||||
llvm::Expected<llvm::SmallVector<int64_t, 4>> Counter::readOrError() const {
|
||||
int64_t Count = 0;
|
||||
ssize_t ReadSize = ::read(FileDescriptor, &Count, sizeof(Count));
|
||||
if (ReadSize != sizeof(Count))
|
||||
|
@ -153,8 +152,7 @@ void Counter::stop() {}
|
|||
|
||||
int64_t Counter::read() const { return 42; }
|
||||
|
||||
llvm::Expected<llvm::SmallVector<int64_t, 4>>
|
||||
Counter::readOrError(StringRef /*unused*/) const {
|
||||
llvm::Expected<llvm::SmallVector<int64_t, 4>> Counter::readOrError() const {
|
||||
return llvm::make_error<llvm::StringError>("Not implemented",
|
||||
llvm::errc::io_error);
|
||||
}
|
||||
|
|
|
@ -59,9 +59,8 @@ public:
|
|||
// e.g. "snb_ep::INSTRUCTION_RETIRED:e=0:i=0:c=0:t=0:u=1:k=0:mg=0:mh=1"
|
||||
StringRef getPfmEventString() const;
|
||||
|
||||
protected:
|
||||
PerfEvent() = default;
|
||||
std::string EventString;
|
||||
private:
|
||||
const std::string EventString;
|
||||
std::string FullQualifiedEventString;
|
||||
perf_event_attr *Attr;
|
||||
};
|
||||
|
@ -88,17 +87,11 @@ public:
|
|||
int64_t read() const;
|
||||
|
||||
/// Returns the current value of the counter or error if it cannot be read.
|
||||
/// FunctionBytes: The benchmark function being executed.
|
||||
/// This is used to filter out the measurements to ensure they are only
|
||||
/// within the benchmarked code.
|
||||
/// If empty (or not specified), then no filtering will be done.
|
||||
/// Not all counters choose to use this.
|
||||
virtual llvm::Expected<llvm::SmallVector<int64_t, 4>>
|
||||
readOrError(StringRef FunctionBytes = StringRef()) const;
|
||||
virtual llvm::Expected<llvm::SmallVector<int64_t, 4>> readOrError() const;
|
||||
|
||||
virtual int numValues() const;
|
||||
|
||||
protected:
|
||||
private:
|
||||
PerfEvent Event;
|
||||
#ifdef HAVE_LIBPFM
|
||||
int FileDescriptor = -1;
|
||||
|
|
|
@ -6,7 +6,6 @@ include_directories(
|
|||
add_library(LLVMExegesisX86
|
||||
STATIC
|
||||
Target.cpp
|
||||
X86Counter.cpp
|
||||
)
|
||||
|
||||
llvm_update_compile_flags(LLVMExegesisX86)
|
||||
|
|
|
@ -14,40 +14,15 @@
|
|||
#include "MCTargetDesc/X86BaseInfo.h"
|
||||
#include "MCTargetDesc/X86MCTargetDesc.h"
|
||||
#include "X86.h"
|
||||
#include "X86Counter.h"
|
||||
#include "X86RegisterInfo.h"
|
||||
#include "X86Subtarget.h"
|
||||
#include "llvm/ADT/Sequence.h"
|
||||
#include "llvm/MC/MCInstBuilder.h"
|
||||
#include "llvm/Support/Errc.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
#include "llvm/Support/FormatVariadic.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
namespace exegesis {
|
||||
|
||||
static cl::OptionCategory
|
||||
BenchmarkOptions("llvm-exegesis benchmark x86-options");
|
||||
|
||||
// If a positive value is specified, we are going to use the LBR in
|
||||
// latency-mode.
|
||||
//
|
||||
// Note:
|
||||
// - A small value is preferred, but too low a value could result in
|
||||
// throttling.
|
||||
// - A prime number is preferred to avoid always skipping certain blocks.
|
||||
//
|
||||
static cl::opt<unsigned> LbrSamplingPeriod(
|
||||
"x86-lbr-sample-period",
|
||||
cl::desc("The sample period (nbranches/sample), used for LBR sampling"),
|
||||
cl::cat(BenchmarkOptions), cl::init(0));
|
||||
|
||||
// FIXME: Validates that repetition-mode is loop if LBR is requested.
|
||||
|
||||
// Returns a non-null reason if we cannot handle the memory references in this
|
||||
// instruction.
|
||||
static const char *isInvalidMemoryInstr(const Instruction &Instr) {
|
||||
|
@ -593,29 +568,10 @@ void ConstantInliner::initStack(unsigned Bytes) {
|
|||
#include "X86GenExegesis.inc"
|
||||
|
||||
namespace {
|
||||
|
||||
class ExegesisX86Target : public ExegesisTarget {
|
||||
public:
|
||||
ExegesisX86Target() : ExegesisTarget(X86CpuPfmCounters) {}
|
||||
|
||||
Expected<std::unique_ptr<pfm::Counter>>
|
||||
createCounter(StringRef CounterName, const LLVMState &State) const override {
|
||||
// If LbrSamplingPeriod was provided, then ignore the
|
||||
// CounterName because we only have one for LBR.
|
||||
if (LbrSamplingPeriod > 0) {
|
||||
// Can't use LBR without HAVE_LIBPFM, or __linux__ (for now)
|
||||
#if defined(HAVE_LIBPFM) && defined(__linux__)
|
||||
return std::make_unique<X86LbrCounter>(
|
||||
X86LbrPerfEvent(LbrSamplingPeriod));
|
||||
#else
|
||||
return llvm::make_error<llvm::StringError>(
|
||||
"LBR counter requested without HAVE_LIBPFM or running on Linux.",
|
||||
llvm::errc::invalid_argument);
|
||||
#endif
|
||||
}
|
||||
return ExegesisTarget::createCounter(CounterName, State);
|
||||
}
|
||||
|
||||
private:
|
||||
void addTargetSpecificPasses(PassManagerBase &PM) const override;
|
||||
|
||||
|
|
|
@ -1,218 +0,0 @@
|
|||
//===-- X86Counter.cpp ------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "X86Counter.h"
|
||||
|
||||
// FIXME: Use appropriate wrappers for poll.h and mman.h
|
||||
// to support Windows and remove this linux-only guard.
|
||||
#ifdef __linux__
|
||||
#include "llvm/Support/Endian.h"
|
||||
#include "llvm/Support/Errc.h"
|
||||
|
||||
#ifdef HAVE_LIBPFM
|
||||
#include "perfmon/perf_event.h"
|
||||
#include "perfmon/pfmlib.h"
|
||||
#include "perfmon/pfmlib_perf_event.h"
|
||||
#endif // HAVE_LIBPFM
|
||||
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <poll.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#ifdef HAVE_LIBPFM
|
||||
namespace llvm {
|
||||
namespace exegesis {
|
||||
|
||||
static constexpr size_t kBufferPages = 8;
|
||||
static const size_t kDataBufferSize = kBufferPages * getpagesize();
|
||||
|
||||
// Waits for the LBR perf events.
|
||||
static int pollLbrPerfEvent(const int FileDescriptor) {
|
||||
struct pollfd PollFd;
|
||||
PollFd.fd = FileDescriptor;
|
||||
PollFd.events = POLLIN;
|
||||
PollFd.revents = 0;
|
||||
return poll(&PollFd, 1 /* num of fds */, 10000 /* timeout in ms */);
|
||||
}
|
||||
|
||||
// Copies the data-buffer into Buf, given the pointer to MMapped.
|
||||
static void copyDataBuffer(void *MMappedBuffer, char *Buf, uint64_t Tail,
|
||||
size_t DataSize) {
|
||||
// First page is reserved for perf_event_mmap_page. Data buffer starts on
|
||||
// the next page.
|
||||
char *Start = reinterpret_cast<char *>(MMappedBuffer) + getpagesize();
|
||||
// The LBR buffer is a cyclic buffer, we copy data to another buffer.
|
||||
uint64_t Offset = Tail % kDataBufferSize;
|
||||
size_t CopySize = kDataBufferSize - Offset;
|
||||
memcpy(Buf, Start + Offset, CopySize);
|
||||
if (CopySize >= DataSize)
|
||||
return;
|
||||
|
||||
memcpy(Buf + CopySize, Start, Offset);
|
||||
return;
|
||||
}
|
||||
|
||||
// Parses the given data-buffer for stats and fill the CycleArray.
|
||||
// If data has been extracted successfully, also modifies the code to jump
|
||||
// out the benchmark loop.
|
||||
static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize,
|
||||
const void *From, const void *To,
|
||||
llvm::SmallVector<int64_t, 4> *CycleArray) {
|
||||
assert(From != nullptr && To != nullptr);
|
||||
const char *DataPtr = DataBuf;
|
||||
while (DataPtr < DataBuf + DataSize) {
|
||||
struct perf_event_header Header;
|
||||
memcpy(&Header, DataPtr, sizeof(struct perf_event_header));
|
||||
if (Header.type != PERF_RECORD_SAMPLE) {
|
||||
// Ignores non-sample records.
|
||||
DataPtr += Header.size;
|
||||
continue;
|
||||
}
|
||||
DataPtr += sizeof(Header);
|
||||
uint64_t Count = llvm::support::endian::read64(DataPtr, support::native);
|
||||
DataPtr += sizeof(Count);
|
||||
|
||||
struct perf_branch_entry Entry;
|
||||
memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
|
||||
// Read the perf_branch_entry array.
|
||||
for (uint64_t i = 0; i < Count; ++i) {
|
||||
const uint64_t BlockStart = From == nullptr
|
||||
? std::numeric_limits<uint64_t>::min()
|
||||
: reinterpret_cast<uint64_t>(From);
|
||||
const uint64_t BlockEnd = To == nullptr
|
||||
? std::numeric_limits<uint64_t>::max()
|
||||
: reinterpret_cast<uint64_t>(To);
|
||||
|
||||
if (BlockStart <= Entry.from && BlockEnd >= Entry.to)
|
||||
CycleArray->push_back(Entry.cycles);
|
||||
|
||||
if (i == Count - 1)
|
||||
// We've reached the last entry.
|
||||
return llvm::Error::success();
|
||||
|
||||
// Advance to next entry
|
||||
DataPtr += sizeof(Entry);
|
||||
memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
|
||||
}
|
||||
}
|
||||
return llvm::make_error<llvm::StringError>("Unable to parse databuffer.",
|
||||
llvm::errc::io_error);
|
||||
}
|
||||
|
||||
#ifdef HAVE_LIBPFM
|
||||
X86LbrPerfEvent::X86LbrPerfEvent(unsigned SamplingPeriod) {
|
||||
assert(SamplingPeriod > 0 && "SamplingPeriod must be positive");
|
||||
EventString = "BR_INST_RETIRED.NEAR_TAKEN";
|
||||
Attr = new perf_event_attr();
|
||||
Attr->size = sizeof(*Attr);
|
||||
Attr->type = PERF_TYPE_RAW;
|
||||
// FIXME This is SKL's encoding. Not sure if it'll change.
|
||||
Attr->config = 0x20c4; // BR_INST_RETIRED.NEAR_TAKEN
|
||||
Attr->sample_type = PERF_SAMPLE_BRANCH_STACK;
|
||||
// Don't need to specify "USER" because we've already excluded HV and Kernel.
|
||||
Attr->branch_sample_type = PERF_SAMPLE_BRANCH_ANY;
|
||||
Attr->sample_period = SamplingPeriod;
|
||||
Attr->wakeup_events = 1; // We need this even when using ioctl REFRESH.
|
||||
Attr->disabled = 1;
|
||||
Attr->exclude_kernel = 1;
|
||||
Attr->exclude_hv = 1;
|
||||
Attr->read_format = PERF_FORMAT_GROUP;
|
||||
|
||||
FullQualifiedEventString = EventString;
|
||||
}
|
||||
#else
|
||||
X86LbrPerfEvent::X86LbrPerfEvent(unsigned SamplingPeriod) {
|
||||
EventString = "";
|
||||
Attr = nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
X86LbrCounter::X86LbrCounter(pfm::PerfEvent &&NewEvent)
|
||||
: Counter(std::move(NewEvent)) {
|
||||
// First page is reserved for perf_event_mmap_page. Data buffer starts on
|
||||
// the next page, so we allocate one more page.
|
||||
MMappedBuffer = mmap(nullptr, (kBufferPages + 1) * getpagesize(),
|
||||
PROT_READ | PROT_WRITE, MAP_SHARED, FileDescriptor, 0);
|
||||
if (MMappedBuffer == MAP_FAILED)
|
||||
llvm::errs() << "Failed to mmap buffer.";
|
||||
}
|
||||
|
||||
X86LbrCounter::~X86LbrCounter() { close(FileDescriptor); }
|
||||
|
||||
void X86LbrCounter::start() {
|
||||
ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */);
|
||||
}
|
||||
|
||||
llvm::Expected<llvm::SmallVector<int64_t, 4>>
|
||||
X86LbrCounter::readOrError(StringRef FunctionBytes) const {
|
||||
// The max number of time-outs/retries before we give up.
|
||||
static constexpr int kMaxTimeouts = 160;
|
||||
|
||||
// Disable the event before reading
|
||||
ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0);
|
||||
|
||||
// Parses the LBR buffer and fills CycleArray with the sequence of cycle
|
||||
// counts from the buffer.
|
||||
llvm::SmallVector<int64_t, 4> CycleArray;
|
||||
std::unique_ptr<char[]> DataBuf(new char[kDataBufferSize]);
|
||||
int NumTimeouts = 0;
|
||||
int PollResult = 0;
|
||||
|
||||
// Find the boundary of the function so that we could filter the LBRs
|
||||
// to keep only the relevant records.
|
||||
if (FunctionBytes.empty())
|
||||
return llvm::make_error<llvm::StringError>("Empty function bytes",
|
||||
llvm::errc::invalid_argument);
|
||||
const void *From = reinterpret_cast<const void *>(FunctionBytes.data());
|
||||
const void *To = reinterpret_cast<const void *>(FunctionBytes.data() +
|
||||
FunctionBytes.size());
|
||||
while (PollResult <= 0) {
|
||||
PollResult = pollLbrPerfEvent(FileDescriptor);
|
||||
if (PollResult > 0)
|
||||
break;
|
||||
if (PollResult == -1)
|
||||
return llvm::make_error<llvm::StringError>("Cannot poll LBR perf event.",
|
||||
llvm::errc::io_error);
|
||||
if (NumTimeouts++ >= kMaxTimeouts)
|
||||
return llvm::make_error<llvm::StringError>(
|
||||
"LBR polling still timed out after max number of attempts.",
|
||||
llvm::errc::device_or_resource_busy);
|
||||
}
|
||||
|
||||
struct perf_event_mmap_page Page;
|
||||
memcpy(&Page, MMappedBuffer, sizeof(struct perf_event_mmap_page));
|
||||
|
||||
const uint64_t DataTail = Page.data_tail;
|
||||
const uint64_t DataHead = Page.data_head;
|
||||
// We're supposed to use a barrier after reading data_head.
|
||||
std::atomic_thread_fence(std::memory_order_acq_rel);
|
||||
const size_t DataSize = DataHead - DataTail;
|
||||
if (DataSize > kDataBufferSize)
|
||||
return llvm::make_error<llvm::StringError>(
|
||||
"DataSize larger than buffer size.", llvm::errc::invalid_argument);
|
||||
|
||||
copyDataBuffer(MMappedBuffer, DataBuf.get(), DataTail, DataSize);
|
||||
llvm::Error error =
|
||||
parseDataBuffer(DataBuf.get(), DataSize, From, To, &CycleArray);
|
||||
if (!error)
|
||||
return CycleArray;
|
||||
return std::move(error);
|
||||
}
|
||||
|
||||
} // namespace exegesis
|
||||
} // namespace llvm
|
||||
|
||||
#endif // HAVE_LIBPFM
|
||||
#endif // __linux__
|
|
@ -1,53 +0,0 @@
|
|||
//===-- X86Counter.h --------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// Perf counter that reads the LBRs for measuring the benchmarked block's
|
||||
/// throughput.
|
||||
///
|
||||
/// More info at: https://lwn.net/Articles/680985
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef LLVM_TOOLS_LLVM_EXEGESIS_LIB_X86_X86COUNTER_H
|
||||
#define LLVM_TOOLS_LLVM_EXEGESIS_LIB_X86_X86COUNTER_H
|
||||
|
||||
#include "../PerfHelper.h"
|
||||
#include "llvm/Support/Error.h"
|
||||
|
||||
// FIXME: Use appropriate wrappers for poll.h and mman.h
|
||||
// to support Windows and remove this linux-only guard.
|
||||
#if defined(__linux__) && defined(HAVE_LIBPFM)
|
||||
|
||||
namespace llvm {
|
||||
namespace exegesis {
|
||||
|
||||
class X86LbrPerfEvent : public pfm::PerfEvent {
|
||||
public:
|
||||
X86LbrPerfEvent(unsigned SamplingPeriod);
|
||||
};
|
||||
|
||||
class X86LbrCounter : public pfm::Counter {
|
||||
public:
|
||||
explicit X86LbrCounter(pfm::PerfEvent &&Event);
|
||||
|
||||
virtual ~X86LbrCounter();
|
||||
|
||||
void start() override;
|
||||
|
||||
llvm::Expected<llvm::SmallVector<int64_t, 4>>
|
||||
readOrError(StringRef FunctionBytes) const override;
|
||||
|
||||
private:
|
||||
void *MMappedBuffer = nullptr;
|
||||
};
|
||||
|
||||
} // namespace exegesis
|
||||
} // namespace llvm
|
||||
|
||||
#endif // defined(__linux__) && defined(HAVE_LIBPFM)
|
||||
|
||||
#endif // LLVM_TOOLS_LLVM_EXEGESIS_LIB_X86_X86COUNTER_H
|
|
@ -160,12 +160,6 @@ static cl::opt<std::string>
|
|||
cl::desc(""), cl::cat(AnalysisOptions),
|
||||
cl::init(""));
|
||||
|
||||
static cl::list<std::string>
|
||||
AllowedHostCpus("allowed-host-cpu",
|
||||
cl::desc("If specified, only run the benchmark if the host "
|
||||
"CPU matches the names"),
|
||||
cl::cat(Options), cl::ZeroOrMore);
|
||||
|
||||
static cl::opt<bool> AnalysisDisplayUnstableOpcodes(
|
||||
"analysis-display-unstable-clusters",
|
||||
cl::desc("if there is more than one benchmark for an opcode, said "
|
||||
|
@ -302,13 +296,6 @@ void benchmarkMain() {
|
|||
|
||||
const LLVMState State(CpuName);
|
||||
|
||||
llvm::StringRef ActualCpu = State.getTargetMachine().getTargetCPU();
|
||||
for (auto Begin = AllowedHostCpus.begin(); Begin != AllowedHostCpus.end();
|
||||
++Begin) {
|
||||
if (ActualCpu != *Begin)
|
||||
ExitWithError(llvm::Twine("Unexpected host CPU ").concat(ActualCpu));
|
||||
}
|
||||
|
||||
const std::unique_ptr<BenchmarkRunner> Runner =
|
||||
ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(
|
||||
BenchmarkMode, State, ResultAggMode));
|
||||
|
|
Loading…
Reference in New Issue