[llvm-exegesis] Explore LEA addressing modes.

Summary:
This will help for PR32326.

This shows the well-known issue with `RBP` and `R13` as base registers.

Reviewers: gchatelet

Subscribers: tschuett, llvm-commits, RKSimon, andreadb

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D68646

llvm-svn: 374146
This commit is contained in:
Clement Courbet 2019-10-09 08:49:13 +00:00
parent 4e969da33e
commit c3a7fb7599
5 changed files with 145 additions and 17 deletions

View File

@ -0,0 +1,16 @@
# RUN: llvm-exegesis -mode=latency -opcode-name=LEA64r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s
# RUN: llvm-exegesis -mode=latency -opcode-name=LEA64r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s
CHECK: ---
CHECK-NEXT: mode: latency
CHECK-NEXT: key:
CHECK-NEXT: instructions:
CHECK-NEXT: LEA64r
CHECK-NEXT: config: '0(%[[REG1:[A-Z0-9]+]], %[[REG1]], 1)'
CHECK: ---
CHECK-NEXT: mode: latency
CHECK-NEXT: key:
CHECK-NEXT: instructions:
CHECK-NEXT: LEA64r
CHECK-NEXT: config: '42(%[[REG2:[A-Z0-9]+]], %[[REG2]], 1)'

View File

@ -0,0 +1,16 @@
# RUN: llvm-exegesis -mode=uops -opcode-name=LEA64r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s
# RUN: llvm-exegesis -mode=uops -opcode-name=LEA64r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s
CHECK: ---
CHECK-NEXT: mode: uops
CHECK-NEXT: key:
CHECK-NEXT: instructions:
CHECK-NEXT: LEA64r
CHECK-NEXT: config: '0(%[[REG1:[A-Z0-9]+]], %[[REG2:[A-Z0-9]+]], 1)'
CHECK: ---
CHECK-NEXT: mode: uops
CHECK-NEXT: key:
CHECK-NEXT: instructions:
CHECK-NEXT: LEA64r
CHECK-NEXT: config: '42(%[[REG3:[A-Z0-9]+]], %[[REG4:[A-Z0-9]+]], 1)'

View File

@ -103,6 +103,13 @@ private:
RegisterClasses; RegisterClasses;
}; };
// `a = a & ~b`, optimized for few bit sets in B and no allocation.
inline void remove(llvm::BitVector &A, const llvm::BitVector &B) {
assert(A.size() == B.size());
for (auto I : B.set_bits())
A.reset(I);
}
} // namespace exegesis } // namespace exegesis
} // namespace llvm } // namespace llvm

View File

@ -89,12 +89,6 @@ getVariablesWithTiedOperands(const Instruction &Instr) {
return Result; return Result;
} }
static void remove(llvm::BitVector &a, const llvm::BitVector &b) {
assert(a.size() == b.size());
for (auto I : b.set_bits())
a.reset(I);
}
UopsBenchmarkRunner::~UopsBenchmarkRunner() = default; UopsBenchmarkRunner::~UopsBenchmarkRunner() = default;
UopsSnippetGenerator::~UopsSnippetGenerator() = default; UopsSnippetGenerator::~UopsSnippetGenerator() = default;

View File

@ -17,6 +17,7 @@
#include "X86RegisterInfo.h" #include "X86RegisterInfo.h"
#include "X86Subtarget.h" #include "X86Subtarget.h"
#include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCInstBuilder.h"
#include "llvm/Support/FormatVariadic.h"
namespace llvm { namespace llvm {
namespace exegesis { namespace exegesis {
@ -177,6 +178,72 @@ static unsigned getX86FPFlags(const Instruction &Instr) {
return Instr.Description->TSFlags & llvm::X86II::FPTypeMask; return Instr.Description->TSFlags & llvm::X86II::FPTypeMask;
} }
// Helper to fill a memory operand with a value.
static void setMemOp(InstructionTemplate &IT, int OpIdx,
const MCOperand &OpVal) {
const auto Op = IT.Instr.Operands[OpIdx];
assert(Op.isExplicit() && "invalid memory pattern");
IT.getValueFor(Op) = OpVal;
};
// Common (latency, uops) code for LEA templates. `GetDestReg` takes the
// addressing base and index registers and returns the LEA destination register.
static llvm::Expected<std::vector<CodeTemplate>> generateLEATemplatesCommon(
const Instruction &Instr, const BitVector &ForbiddenRegisters,
const LLVMState &State, const SnippetGenerator::Options &Opts,
std::function<unsigned(unsigned, unsigned)> GetDestReg) {
assert(Instr.Operands.size() == 6 && "invalid LEA");
assert(X86II::getMemoryOperandNo(Instr.Description->TSFlags) == 1 &&
"invalid LEA");
constexpr const int kDestOp = 0;
constexpr const int kBaseOp = 1;
constexpr const int kIndexOp = 3;
auto PossibleDestRegs =
Instr.Operands[kDestOp].getRegisterAliasing().sourceBits();
remove(PossibleDestRegs, ForbiddenRegisters);
auto PossibleBaseRegs =
Instr.Operands[kBaseOp].getRegisterAliasing().sourceBits();
remove(PossibleBaseRegs, ForbiddenRegisters);
auto PossibleIndexRegs =
Instr.Operands[kIndexOp].getRegisterAliasing().sourceBits();
remove(PossibleIndexRegs, ForbiddenRegisters);
const auto &RegInfo = State.getRegInfo();
std::vector<CodeTemplate> Result;
for (const unsigned BaseReg : PossibleBaseRegs.set_bits()) {
for (const unsigned IndexReg : PossibleIndexRegs.set_bits()) {
for (int LogScale = 0; LogScale <= 3; ++LogScale) {
// FIXME: Add an option for controlling how we explore immediates.
for (const int Disp : {0, 42}) {
InstructionTemplate IT(Instr);
const int64_t Scale = 1ull << LogScale;
setMemOp(IT, 1, MCOperand::createReg(BaseReg));
setMemOp(IT, 2, MCOperand::createImm(Scale));
setMemOp(IT, 3, MCOperand::createReg(IndexReg));
setMemOp(IT, 4, MCOperand::createImm(Disp));
// SegmentReg must be 0 for LEA.
setMemOp(IT, 5, MCOperand::createReg(0));
// Output reg is selected by the caller.
setMemOp(IT, 0, MCOperand::createReg(GetDestReg(BaseReg, IndexReg)));
CodeTemplate CT;
CT.Instructions.push_back(std::move(IT));
CT.Config = formatv("{3}(%{0}, %{1}, {2})", RegInfo.getName(BaseReg),
RegInfo.getName(IndexReg), Scale, Disp)
.str();
Result.push_back(std::move(CT));
if (Result.size() >= Opts.MaxConfigsPerOpcode)
return Result;
}
}
}
}
return Result;
}
namespace { namespace {
class X86LatencySnippetGenerator : public LatencySnippetGenerator { class X86LatencySnippetGenerator : public LatencySnippetGenerator {
public: public:
@ -194,6 +261,17 @@ X86LatencySnippetGenerator::generateCodeTemplates(
if (auto E = IsInvalidOpcode(Instr)) if (auto E = IsInvalidOpcode(Instr))
return std::move(E); return std::move(E);
// LEA gets special attention.
const auto Opcode = Instr.Description->getOpcode();
if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) {
return generateLEATemplatesCommon(Instr, ForbiddenRegisters, State, Opts,
[](unsigned BaseReg, unsigned IndexReg) {
// We just select the same base and
// output register.
return BaseReg;
});
}
switch (getX86FPFlags(Instr)) { switch (getX86FPFlags(Instr)) {
case llvm::X86II::NotFP: case llvm::X86II::NotFP:
return LatencySnippetGenerator::generateCodeTemplates(Instr, return LatencySnippetGenerator::generateCodeTemplates(Instr,
@ -225,6 +303,7 @@ public:
generateCodeTemplates(const Instruction &Instr, generateCodeTemplates(const Instruction &Instr,
const BitVector &ForbiddenRegisters) const override; const BitVector &ForbiddenRegisters) const override;
}; };
} // namespace } // namespace
llvm::Expected<std::vector<CodeTemplate>> llvm::Expected<std::vector<CodeTemplate>>
@ -233,6 +312,28 @@ X86UopsSnippetGenerator::generateCodeTemplates(
if (auto E = IsInvalidOpcode(Instr)) if (auto E = IsInvalidOpcode(Instr))
return std::move(E); return std::move(E);
// LEA gets special attention.
const auto Opcode = Instr.Description->getOpcode();
if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) {
// Any destination register that is not used for adddressing is fine.
auto PossibleDestRegs =
Instr.Operands[0].getRegisterAliasing().sourceBits();
remove(PossibleDestRegs, ForbiddenRegisters);
return generateLEATemplatesCommon(
Instr, ForbiddenRegisters, State, Opts,
[this, &PossibleDestRegs](unsigned BaseReg, unsigned IndexReg) {
auto PossibleDestRegsNow = PossibleDestRegs;
remove(PossibleDestRegsNow,
State.getRATC().getRegister(BaseReg).aliasedBits());
remove(PossibleDestRegsNow,
State.getRATC().getRegister(IndexReg).aliasedBits());
assert(PossibleDestRegsNow.set_bits().begin() !=
PossibleDestRegsNow.set_bits().end() &&
"no remaining registers");
return *PossibleDestRegsNow.set_bits().begin();
});
}
switch (getX86FPFlags(Instr)) { switch (getX86FPFlags(Instr)) {
case llvm::X86II::NotFP: case llvm::X86II::NotFP:
return UopsSnippetGenerator::generateCodeTemplates(Instr, return UopsSnippetGenerator::generateCodeTemplates(Instr,
@ -548,17 +649,11 @@ void ExegesisX86Target::fillMemoryOperands(InstructionTemplate &IT,
++MemOpIdx; ++MemOpIdx;
} }
} }
// Now fill in the memory operands. setMemOp(IT, MemOpIdx + 0, MCOperand::createReg(Reg)); // BaseReg
const auto SetOp = [&IT](int OpIdx, const MCOperand &OpVal) { setMemOp(IT, MemOpIdx + 1, MCOperand::createImm(1)); // ScaleAmt
const auto Op = IT.Instr.Operands[OpIdx]; setMemOp(IT, MemOpIdx + 2, MCOperand::createReg(0)); // IndexReg
assert(Op.isMemory() && Op.isExplicit() && "invalid memory pattern"); setMemOp(IT, MemOpIdx + 3, MCOperand::createImm(Offset)); // Disp
IT.getValueFor(Op) = OpVal; setMemOp(IT, MemOpIdx + 4, MCOperand::createReg(0)); // Segment
};
SetOp(MemOpIdx + 0, MCOperand::createReg(Reg)); // BaseReg
SetOp(MemOpIdx + 1, MCOperand::createImm(1)); // ScaleAmt
SetOp(MemOpIdx + 2, MCOperand::createReg(0)); // IndexReg
SetOp(MemOpIdx + 3, MCOperand::createImm(Offset)); // Disp
SetOp(MemOpIdx + 4, MCOperand::createReg(0)); // Segment
} }
void ExegesisX86Target::decrementLoopCounterAndJump( void ExegesisX86Target::decrementLoopCounterAndJump(