2016-07-08 06:50:23 +08:00
|
|
|
//===---- X86FixupSetCC.cpp - optimize usage of LEA instructions ----------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file defines a pass that fixes zero-extension of setcc patterns.
|
|
|
|
// X86 setcc instructions are modeled to have no input arguments, and a single
|
|
|
|
// GR8 output argument. This is consistent with other similar instructions
|
|
|
|
// (e.g. movb), but means it is impossible to directly generate a setcc into
|
|
|
|
// the lower GR8 of a specified GR32.
|
|
|
|
// This means that ISel must select (zext (setcc)) into something like
|
|
|
|
// seta %al; movzbl %al, %eax.
|
|
|
|
// Unfortunately, this can cause a stall due to the partial register write
|
|
|
|
// performed by the setcc. Instead, we can use:
|
|
|
|
// xor %eax, %eax; seta %al
|
|
|
|
// This both avoids the stall, and encodes shorter.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "X86.h"
|
|
|
|
#include "X86InstrInfo.h"
|
|
|
|
#include "X86Subtarget.h"
|
|
|
|
#include "llvm/ADT/Statistic.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "x86-fixup-setcc"
|
|
|
|
|
|
|
|
STATISTIC(NumSubstZexts, "Number of setcc + zext pairs substituted");
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
class X86FixupSetCCPass : public MachineFunctionPass {
|
|
|
|
public:
|
|
|
|
X86FixupSetCCPass() : MachineFunctionPass(ID) {}
|
|
|
|
|
2016-10-01 10:56:57 +08:00
|
|
|
StringRef getPassName() const override { return "X86 Fixup SetCC"; }
|
2016-07-08 06:50:23 +08:00
|
|
|
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
|
|
|
|
private:
|
|
|
|
// Find the preceding instruction that imp-defs eflags.
|
|
|
|
MachineInstr *findFlagsImpDef(MachineBasicBlock *MBB,
|
|
|
|
MachineBasicBlock::reverse_iterator MI);
|
|
|
|
|
|
|
|
// Return true if MI imp-uses eflags.
|
|
|
|
bool impUsesFlags(MachineInstr *MI);
|
|
|
|
|
|
|
|
// Return true if this is the opcode of a SetCC instruction with a register
|
|
|
|
// output.
|
|
|
|
bool isSetCCr(unsigned Opode);
|
|
|
|
|
|
|
|
MachineRegisterInfo *MRI;
|
|
|
|
const X86InstrInfo *TII;
|
|
|
|
|
|
|
|
enum { SearchBound = 16 };
|
|
|
|
|
|
|
|
static char ID;
|
|
|
|
};
|
|
|
|
|
|
|
|
char X86FixupSetCCPass::ID = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); }
|
|
|
|
|
|
|
|
bool X86FixupSetCCPass::isSetCCr(unsigned Opcode) {
|
|
|
|
switch (Opcode) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case X86::SETOr:
|
|
|
|
case X86::SETNOr:
|
|
|
|
case X86::SETBr:
|
|
|
|
case X86::SETAEr:
|
|
|
|
case X86::SETEr:
|
|
|
|
case X86::SETNEr:
|
|
|
|
case X86::SETBEr:
|
|
|
|
case X86::SETAr:
|
|
|
|
case X86::SETSr:
|
|
|
|
case X86::SETNSr:
|
|
|
|
case X86::SETPr:
|
|
|
|
case X86::SETNPr:
|
|
|
|
case X86::SETLr:
|
|
|
|
case X86::SETGEr:
|
|
|
|
case X86::SETLEr:
|
|
|
|
case X86::SETGr:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We expect the instruction *immediately* before the setcc to imp-def
|
|
|
|
// EFLAGS (because of scheduling glue). To make this less brittle w.r.t
|
|
|
|
// scheduling, look backwards until we hit the beginning of the
|
|
|
|
// basic-block, or a small bound (to avoid quadratic behavior).
|
|
|
|
MachineInstr *
|
|
|
|
X86FixupSetCCPass::findFlagsImpDef(MachineBasicBlock *MBB,
|
|
|
|
MachineBasicBlock::reverse_iterator MI) {
|
ADT: Give ilist<T>::reverse_iterator a handle to the current node
Reverse iterators to doubly-linked lists can be simpler (and cheaper)
than std::reverse_iterator. Make it so.
In particular, change ilist<T>::reverse_iterator so that it is *never*
invalidated unless the node it references is deleted. This matches the
guarantees of ilist<T>::iterator.
(Note: MachineBasicBlock::iterator is *not* an ilist iterator, but a
MachineInstrBundleIterator<MachineInstr>. This commit does not change
MachineBasicBlock::reverse_iterator, but it does update
MachineBasicBlock::reverse_instr_iterator. See note at end of commit
message for details on bundle iterators.)
Given the list (with the Sentinel showing twice for simplicity):
[Sentinel] <-> A <-> B <-> [Sentinel]
the following is now true:
1. begin() represents A.
2. begin() holds the pointer for A.
3. end() represents [Sentinel].
4. end() holds the poitner for [Sentinel].
5. rbegin() represents B.
6. rbegin() holds the pointer for B.
7. rend() represents [Sentinel].
8. rend() holds the pointer for [Sentinel].
The changes are #6 and #8. Here are some properties from the old
scheme (which used std::reverse_iterator):
- rbegin() held the pointer for [Sentinel] and rend() held the pointer
for A;
- operator*() cost two dereferences instead of one;
- converting from a valid iterator to its valid reverse_iterator
involved a confusing increment; and
- "RI++->erase()" left RI invalid. The unintuitive replacement was
"RI->erase(), RE = end()".
With vector-like data structures these properties are hard to avoid
(since past-the-beginning is not a valid pointer), and don't impose a
real cost (since there's still only one dereference, and all iterators
are invalidated on erase). But with lists, this was a poor design.
Specifically, the following code (which obviously works with normal
iterators) now works with ilist::reverse_iterator as well:
for (auto RI = L.rbegin(), RE = L.rend(); RI != RE;)
fooThatMightRemoveArgFromList(*RI++);
Converting between iterator and reverse_iterator for the same node uses
the getReverse() function.
reverse_iterator iterator::getReverse();
iterator reverse_iterator::getReverse();
Why doesn't iterator <=> reverse_iterator conversion use constructors?
In order to catch and update old code, reverse_iterator does not even
have an explicit conversion from iterator. It wouldn't be safe because
there would be no reasonable way to catch all the bugs from the changed
semantic (see the changes at call sites that are part of this patch).
Old code used this API:
std::reverse_iterator::reverse_iterator(iterator);
iterator std::reverse_iterator::base();
Here's how to update from old code to new (that incorporates the
semantic change), assuming I is an ilist<>::iterator and RI is an
ilist<>::reverse_iterator:
[Old] ==> [New]
reverse_iterator(I) (--I).getReverse()
reverse_iterator(I) ++I.getReverse()
--reverse_iterator(I) I.getReverse()
reverse_iterator(++I) I.getReverse()
RI.base() (--RI).getReverse()
RI.base() ++RI.getReverse()
--RI.base() RI.getReverse()
(++RI).base() RI.getReverse()
delete &*RI, RE = end() delete &*RI++
RI->erase(), RE = end() RI++->erase()
=======================================
Note: bundle iterators are out of scope
=======================================
MachineBasicBlock::iterator, also known as
MachineInstrBundleIterator<MachineInstr>, is a wrapper to represent
MachineInstr bundles. The idea is that each operator++ takes you to the
beginning of the next bundle. Implementing a sane reverse iterator for
this is harder than ilist. Here are the options:
- Use std::reverse_iterator<MBB::i>. Store a handle to the beginning of
the next bundle. A call to operator*() runs a loop (usually
operator--() will be called 1 time, for unbundled instructions).
Increment/decrement just works. This is the status quo.
- Store a handle to the final node in the bundle. A call to operator*()
still runs a loop, but it iterates one time fewer (usually
operator--() will be called 0 times, for unbundled instructions).
Increment/decrement just works.
- Make the ilist_sentinel<MachineInstr> *always* store that it's the
sentinel (instead of just in asserts mode). Then the bundle iterator
can sniff the sentinel bit in operator++().
I initially tried implementing the end() option as part of this commit,
but updating iterator/reverse_iterator conversion call sites was
error-prone. I have a WIP series of patches that implements the final
option.
llvm-svn: 280032
2016-08-30 08:13:12 +08:00
|
|
|
// FIXME: Should this be instr_rend(), and MI be reverse_instr_iterator?
|
|
|
|
auto MBBStart = MBB->rend();
|
2016-07-08 06:50:23 +08:00
|
|
|
for (int i = 0; (i < SearchBound) && (MI != MBBStart); ++i, ++MI)
|
|
|
|
for (auto &Op : MI->implicit_operands())
|
|
|
|
if ((Op.getReg() == X86::EFLAGS) && (Op.isDef()))
|
|
|
|
return &*MI;
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FixupSetCCPass::impUsesFlags(MachineInstr *MI) {
|
|
|
|
for (auto &Op : MI->implicit_operands())
|
|
|
|
if ((Op.getReg() == X86::EFLAGS) && (Op.isUse()))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
|
|
|
|
bool Changed = false;
|
|
|
|
MRI = &MF.getRegInfo();
|
|
|
|
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
|
|
|
|
|
|
|
|
SmallVector<MachineInstr*, 4> ToErase;
|
|
|
|
|
|
|
|
for (auto &MBB : MF) {
|
|
|
|
for (auto &MI : MBB) {
|
|
|
|
// Find a setcc that is used by a zext.
|
|
|
|
// This doesn't have to be the only use, the transformation is safe
|
|
|
|
// regardless.
|
|
|
|
if (!isSetCCr(MI.getOpcode()))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
MachineInstr *ZExt = nullptr;
|
|
|
|
for (auto &Use : MRI->use_instructions(MI.getOperand(0).getReg()))
|
|
|
|
if (Use.getOpcode() == X86::MOVZX32rr8)
|
|
|
|
ZExt = &Use;
|
|
|
|
|
|
|
|
if (!ZExt)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Find the preceding instruction that imp-defs eflags.
|
|
|
|
MachineInstr *FlagsDefMI = findFlagsImpDef(
|
|
|
|
MI.getParent(), MachineBasicBlock::reverse_iterator(&MI));
|
|
|
|
if (!FlagsDefMI)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// We'd like to put something that clobbers eflags directly before
|
|
|
|
// FlagsDefMI. This can't hurt anything after FlagsDefMI, because
|
|
|
|
// it, itself, by definition, clobbers eflags. But it may happen that
|
|
|
|
// FlagsDefMI also *uses* eflags, in which case the transformation is
|
|
|
|
// invalid.
|
|
|
|
if (impUsesFlags(FlagsDefMI))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
++NumSubstZexts;
|
|
|
|
Changed = true;
|
|
|
|
|
|
|
|
// On 32-bit, we need to be careful to force an ABCD register.
|
|
|
|
const TargetRegisterClass *RC = MF.getSubtarget<X86Subtarget>().is64Bit()
|
|
|
|
? &X86::GR32RegClass
|
|
|
|
: &X86::GR32_ABCDRegClass;
|
|
|
|
unsigned ZeroReg = MRI->createVirtualRegister(RC);
|
|
|
|
unsigned InsertReg = MRI->createVirtualRegister(RC);
|
|
|
|
|
|
|
|
// Initialize a register with 0. This must go before the eflags def
|
|
|
|
BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0),
|
|
|
|
ZeroReg);
|
|
|
|
|
|
|
|
// X86 setcc only takes an output GR8, so fake a GR32 input by inserting
|
|
|
|
// the setcc result into the low byte of the zeroed register.
|
|
|
|
BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(),
|
|
|
|
TII->get(X86::INSERT_SUBREG), InsertReg)
|
|
|
|
.addReg(ZeroReg)
|
|
|
|
.addReg(MI.getOperand(0).getReg())
|
|
|
|
.addImm(X86::sub_8bit);
|
|
|
|
MRI->replaceRegWith(ZExt->getOperand(0).getReg(), InsertReg);
|
|
|
|
ToErase.push_back(ZExt);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto &I : ToErase)
|
|
|
|
I->eraseFromParent();
|
|
|
|
|
|
|
|
return Changed;
|
|
|
|
}
|