forked from OSchip/llvm-project
[llvm-exegesis] Explore LEA addressing modes.
Summary: This will help for PR32326. This shows the well-known issue with `RBP` and `R13` as base registers. Reviewers: gchatelet Subscribers: tschuett, llvm-commits, RKSimon, andreadb Tags: #llvm Differential Revision: https://reviews.llvm.org/D68646 llvm-svn: 374146
This commit is contained in:
parent
4e969da33e
commit
c3a7fb7599
|
@ -0,0 +1,16 @@
|
||||||
|
# RUN: llvm-exegesis -mode=latency -opcode-name=LEA64r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s
|
||||||
|
# RUN: llvm-exegesis -mode=latency -opcode-name=LEA64r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s
|
||||||
|
|
||||||
|
CHECK: ---
|
||||||
|
CHECK-NEXT: mode: latency
|
||||||
|
CHECK-NEXT: key:
|
||||||
|
CHECK-NEXT: instructions:
|
||||||
|
CHECK-NEXT: LEA64r
|
||||||
|
CHECK-NEXT: config: '0(%[[REG1:[A-Z0-9]+]], %[[REG1]], 1)'
|
||||||
|
|
||||||
|
CHECK: ---
|
||||||
|
CHECK-NEXT: mode: latency
|
||||||
|
CHECK-NEXT: key:
|
||||||
|
CHECK-NEXT: instructions:
|
||||||
|
CHECK-NEXT: LEA64r
|
||||||
|
CHECK-NEXT: config: '42(%[[REG2:[A-Z0-9]+]], %[[REG2]], 1)'
|
|
@ -0,0 +1,16 @@
|
||||||
|
# RUN: llvm-exegesis -mode=uops -opcode-name=LEA64r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s
|
||||||
|
# RUN: llvm-exegesis -mode=uops -opcode-name=LEA64r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s
|
||||||
|
|
||||||
|
CHECK: ---
|
||||||
|
CHECK-NEXT: mode: uops
|
||||||
|
CHECK-NEXT: key:
|
||||||
|
CHECK-NEXT: instructions:
|
||||||
|
CHECK-NEXT: LEA64r
|
||||||
|
CHECK-NEXT: config: '0(%[[REG1:[A-Z0-9]+]], %[[REG2:[A-Z0-9]+]], 1)'
|
||||||
|
|
||||||
|
CHECK: ---
|
||||||
|
CHECK-NEXT: mode: uops
|
||||||
|
CHECK-NEXT: key:
|
||||||
|
CHECK-NEXT: instructions:
|
||||||
|
CHECK-NEXT: LEA64r
|
||||||
|
CHECK-NEXT: config: '42(%[[REG3:[A-Z0-9]+]], %[[REG4:[A-Z0-9]+]], 1)'
|
|
@ -103,6 +103,13 @@ private:
|
||||||
RegisterClasses;
|
RegisterClasses;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// `a = a & ~b`, optimized for few bit sets in B and no allocation.
|
||||||
|
inline void remove(llvm::BitVector &A, const llvm::BitVector &B) {
|
||||||
|
assert(A.size() == B.size());
|
||||||
|
for (auto I : B.set_bits())
|
||||||
|
A.reset(I);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace exegesis
|
} // namespace exegesis
|
||||||
} // namespace llvm
|
} // namespace llvm
|
||||||
|
|
||||||
|
|
|
@ -89,12 +89,6 @@ getVariablesWithTiedOperands(const Instruction &Instr) {
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void remove(llvm::BitVector &a, const llvm::BitVector &b) {
|
|
||||||
assert(a.size() == b.size());
|
|
||||||
for (auto I : b.set_bits())
|
|
||||||
a.reset(I);
|
|
||||||
}
|
|
||||||
|
|
||||||
UopsBenchmarkRunner::~UopsBenchmarkRunner() = default;
|
UopsBenchmarkRunner::~UopsBenchmarkRunner() = default;
|
||||||
|
|
||||||
UopsSnippetGenerator::~UopsSnippetGenerator() = default;
|
UopsSnippetGenerator::~UopsSnippetGenerator() = default;
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#include "X86RegisterInfo.h"
|
#include "X86RegisterInfo.h"
|
||||||
#include "X86Subtarget.h"
|
#include "X86Subtarget.h"
|
||||||
#include "llvm/MC/MCInstBuilder.h"
|
#include "llvm/MC/MCInstBuilder.h"
|
||||||
|
#include "llvm/Support/FormatVariadic.h"
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
namespace exegesis {
|
namespace exegesis {
|
||||||
|
@ -177,6 +178,72 @@ static unsigned getX86FPFlags(const Instruction &Instr) {
|
||||||
return Instr.Description->TSFlags & llvm::X86II::FPTypeMask;
|
return Instr.Description->TSFlags & llvm::X86II::FPTypeMask;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper to fill a memory operand with a value.
|
||||||
|
static void setMemOp(InstructionTemplate &IT, int OpIdx,
|
||||||
|
const MCOperand &OpVal) {
|
||||||
|
const auto Op = IT.Instr.Operands[OpIdx];
|
||||||
|
assert(Op.isExplicit() && "invalid memory pattern");
|
||||||
|
IT.getValueFor(Op) = OpVal;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Common (latency, uops) code for LEA templates. `GetDestReg` takes the
|
||||||
|
// addressing base and index registers and returns the LEA destination register.
|
||||||
|
static llvm::Expected<std::vector<CodeTemplate>> generateLEATemplatesCommon(
|
||||||
|
const Instruction &Instr, const BitVector &ForbiddenRegisters,
|
||||||
|
const LLVMState &State, const SnippetGenerator::Options &Opts,
|
||||||
|
std::function<unsigned(unsigned, unsigned)> GetDestReg) {
|
||||||
|
assert(Instr.Operands.size() == 6 && "invalid LEA");
|
||||||
|
assert(X86II::getMemoryOperandNo(Instr.Description->TSFlags) == 1 &&
|
||||||
|
"invalid LEA");
|
||||||
|
|
||||||
|
constexpr const int kDestOp = 0;
|
||||||
|
constexpr const int kBaseOp = 1;
|
||||||
|
constexpr const int kIndexOp = 3;
|
||||||
|
auto PossibleDestRegs =
|
||||||
|
Instr.Operands[kDestOp].getRegisterAliasing().sourceBits();
|
||||||
|
remove(PossibleDestRegs, ForbiddenRegisters);
|
||||||
|
auto PossibleBaseRegs =
|
||||||
|
Instr.Operands[kBaseOp].getRegisterAliasing().sourceBits();
|
||||||
|
remove(PossibleBaseRegs, ForbiddenRegisters);
|
||||||
|
auto PossibleIndexRegs =
|
||||||
|
Instr.Operands[kIndexOp].getRegisterAliasing().sourceBits();
|
||||||
|
remove(PossibleIndexRegs, ForbiddenRegisters);
|
||||||
|
|
||||||
|
const auto &RegInfo = State.getRegInfo();
|
||||||
|
std::vector<CodeTemplate> Result;
|
||||||
|
for (const unsigned BaseReg : PossibleBaseRegs.set_bits()) {
|
||||||
|
for (const unsigned IndexReg : PossibleIndexRegs.set_bits()) {
|
||||||
|
for (int LogScale = 0; LogScale <= 3; ++LogScale) {
|
||||||
|
// FIXME: Add an option for controlling how we explore immediates.
|
||||||
|
for (const int Disp : {0, 42}) {
|
||||||
|
InstructionTemplate IT(Instr);
|
||||||
|
const int64_t Scale = 1ull << LogScale;
|
||||||
|
setMemOp(IT, 1, MCOperand::createReg(BaseReg));
|
||||||
|
setMemOp(IT, 2, MCOperand::createImm(Scale));
|
||||||
|
setMemOp(IT, 3, MCOperand::createReg(IndexReg));
|
||||||
|
setMemOp(IT, 4, MCOperand::createImm(Disp));
|
||||||
|
// SegmentReg must be 0 for LEA.
|
||||||
|
setMemOp(IT, 5, MCOperand::createReg(0));
|
||||||
|
|
||||||
|
// Output reg is selected by the caller.
|
||||||
|
setMemOp(IT, 0, MCOperand::createReg(GetDestReg(BaseReg, IndexReg)));
|
||||||
|
|
||||||
|
CodeTemplate CT;
|
||||||
|
CT.Instructions.push_back(std::move(IT));
|
||||||
|
CT.Config = formatv("{3}(%{0}, %{1}, {2})", RegInfo.getName(BaseReg),
|
||||||
|
RegInfo.getName(IndexReg), Scale, Disp)
|
||||||
|
.str();
|
||||||
|
Result.push_back(std::move(CT));
|
||||||
|
if (Result.size() >= Opts.MaxConfigsPerOpcode)
|
||||||
|
return Result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Result;
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
class X86LatencySnippetGenerator : public LatencySnippetGenerator {
|
class X86LatencySnippetGenerator : public LatencySnippetGenerator {
|
||||||
public:
|
public:
|
||||||
|
@ -194,6 +261,17 @@ X86LatencySnippetGenerator::generateCodeTemplates(
|
||||||
if (auto E = IsInvalidOpcode(Instr))
|
if (auto E = IsInvalidOpcode(Instr))
|
||||||
return std::move(E);
|
return std::move(E);
|
||||||
|
|
||||||
|
// LEA gets special attention.
|
||||||
|
const auto Opcode = Instr.Description->getOpcode();
|
||||||
|
if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) {
|
||||||
|
return generateLEATemplatesCommon(Instr, ForbiddenRegisters, State, Opts,
|
||||||
|
[](unsigned BaseReg, unsigned IndexReg) {
|
||||||
|
// We just select the same base and
|
||||||
|
// output register.
|
||||||
|
return BaseReg;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
switch (getX86FPFlags(Instr)) {
|
switch (getX86FPFlags(Instr)) {
|
||||||
case llvm::X86II::NotFP:
|
case llvm::X86II::NotFP:
|
||||||
return LatencySnippetGenerator::generateCodeTemplates(Instr,
|
return LatencySnippetGenerator::generateCodeTemplates(Instr,
|
||||||
|
@ -225,6 +303,7 @@ public:
|
||||||
generateCodeTemplates(const Instruction &Instr,
|
generateCodeTemplates(const Instruction &Instr,
|
||||||
const BitVector &ForbiddenRegisters) const override;
|
const BitVector &ForbiddenRegisters) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
llvm::Expected<std::vector<CodeTemplate>>
|
llvm::Expected<std::vector<CodeTemplate>>
|
||||||
|
@ -233,6 +312,28 @@ X86UopsSnippetGenerator::generateCodeTemplates(
|
||||||
if (auto E = IsInvalidOpcode(Instr))
|
if (auto E = IsInvalidOpcode(Instr))
|
||||||
return std::move(E);
|
return std::move(E);
|
||||||
|
|
||||||
|
// LEA gets special attention.
|
||||||
|
const auto Opcode = Instr.Description->getOpcode();
|
||||||
|
if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) {
|
||||||
|
// Any destination register that is not used for adddressing is fine.
|
||||||
|
auto PossibleDestRegs =
|
||||||
|
Instr.Operands[0].getRegisterAliasing().sourceBits();
|
||||||
|
remove(PossibleDestRegs, ForbiddenRegisters);
|
||||||
|
return generateLEATemplatesCommon(
|
||||||
|
Instr, ForbiddenRegisters, State, Opts,
|
||||||
|
[this, &PossibleDestRegs](unsigned BaseReg, unsigned IndexReg) {
|
||||||
|
auto PossibleDestRegsNow = PossibleDestRegs;
|
||||||
|
remove(PossibleDestRegsNow,
|
||||||
|
State.getRATC().getRegister(BaseReg).aliasedBits());
|
||||||
|
remove(PossibleDestRegsNow,
|
||||||
|
State.getRATC().getRegister(IndexReg).aliasedBits());
|
||||||
|
assert(PossibleDestRegsNow.set_bits().begin() !=
|
||||||
|
PossibleDestRegsNow.set_bits().end() &&
|
||||||
|
"no remaining registers");
|
||||||
|
return *PossibleDestRegsNow.set_bits().begin();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
switch (getX86FPFlags(Instr)) {
|
switch (getX86FPFlags(Instr)) {
|
||||||
case llvm::X86II::NotFP:
|
case llvm::X86II::NotFP:
|
||||||
return UopsSnippetGenerator::generateCodeTemplates(Instr,
|
return UopsSnippetGenerator::generateCodeTemplates(Instr,
|
||||||
|
@ -548,17 +649,11 @@ void ExegesisX86Target::fillMemoryOperands(InstructionTemplate &IT,
|
||||||
++MemOpIdx;
|
++MemOpIdx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Now fill in the memory operands.
|
setMemOp(IT, MemOpIdx + 0, MCOperand::createReg(Reg)); // BaseReg
|
||||||
const auto SetOp = [&IT](int OpIdx, const MCOperand &OpVal) {
|
setMemOp(IT, MemOpIdx + 1, MCOperand::createImm(1)); // ScaleAmt
|
||||||
const auto Op = IT.Instr.Operands[OpIdx];
|
setMemOp(IT, MemOpIdx + 2, MCOperand::createReg(0)); // IndexReg
|
||||||
assert(Op.isMemory() && Op.isExplicit() && "invalid memory pattern");
|
setMemOp(IT, MemOpIdx + 3, MCOperand::createImm(Offset)); // Disp
|
||||||
IT.getValueFor(Op) = OpVal;
|
setMemOp(IT, MemOpIdx + 4, MCOperand::createReg(0)); // Segment
|
||||||
};
|
|
||||||
SetOp(MemOpIdx + 0, MCOperand::createReg(Reg)); // BaseReg
|
|
||||||
SetOp(MemOpIdx + 1, MCOperand::createImm(1)); // ScaleAmt
|
|
||||||
SetOp(MemOpIdx + 2, MCOperand::createReg(0)); // IndexReg
|
|
||||||
SetOp(MemOpIdx + 3, MCOperand::createImm(Offset)); // Disp
|
|
||||||
SetOp(MemOpIdx + 4, MCOperand::createReg(0)); // Segment
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ExegesisX86Target::decrementLoopCounterAndJump(
|
void ExegesisX86Target::decrementLoopCounterAndJump(
|
||||||
|
|
Loading…
Reference in New Issue