2015-12-22 07:04:27 +08:00
|
|
|
//===------- X86ExpandPseudo.cpp - Expand pseudo instructions -------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2015-12-22 07:04:27 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file contains a pass that expands pseudo instructions into target
|
|
|
|
// instructions to allow proper scheduling, if-conversion, other late
|
|
|
|
// optimizations, or simply the encoding of the instructions.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "X86.h"
|
|
|
|
#include "X86FrameLowering.h"
|
|
|
|
#include "X86InstrBuilder.h"
|
|
|
|
#include "X86InstrInfo.h"
|
|
|
|
#include "X86MachineFunctionInfo.h"
|
|
|
|
#include "X86Subtarget.h"
|
|
|
|
#include "llvm/Analysis/EHPersonalities.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
|
|
#include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.
|
|
|
|
#include "llvm/IR/GlobalValue.h"
|
|
|
|
using namespace llvm;
|
|
|
|
|
|
|
|
#define DEBUG_TYPE "x86-pseudo"
|
2019-04-06 04:18:21 +08:00
|
|
|
#define X86_EXPAND_PSEUDO_NAME "X86 pseudo instruction expansion pass"
|
2015-12-22 07:04:27 +08:00
|
|
|
|
|
|
|
namespace {
|
|
|
|
class X86ExpandPseudo : public MachineFunctionPass {
|
|
|
|
public:
|
|
|
|
static char ID;
|
|
|
|
X86ExpandPseudo() : MachineFunctionPass(ID) {}
|
|
|
|
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
|
|
AU.setPreservesCFG();
|
|
|
|
AU.addPreservedID(MachineLoopInfoID);
|
|
|
|
AU.addPreservedID(MachineDominatorsID);
|
|
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
|
|
}
|
|
|
|
|
|
|
|
const X86Subtarget *STI;
|
|
|
|
const X86InstrInfo *TII;
|
|
|
|
const X86RegisterInfo *TRI;
|
2016-07-12 05:03:03 +08:00
|
|
|
const X86MachineFunctionInfo *X86FI;
|
2015-12-22 07:04:27 +08:00
|
|
|
const X86FrameLowering *X86FL;
|
|
|
|
|
|
|
|
bool runOnMachineFunction(MachineFunction &Fn) override;
|
|
|
|
|
2016-04-05 01:09:25 +08:00
|
|
|
MachineFunctionProperties getRequiredProperties() const override {
|
|
|
|
return MachineFunctionProperties().set(
|
2016-08-25 09:27:13 +08:00
|
|
|
MachineFunctionProperties::Property::NoVRegs);
|
2016-04-05 01:09:25 +08:00
|
|
|
}
|
|
|
|
|
2016-10-01 10:56:57 +08:00
|
|
|
StringRef getPassName() const override {
|
2015-12-22 07:04:27 +08:00
|
|
|
return "X86 pseudo instruction expansion pass";
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2018-03-10 03:11:44 +08:00
|
|
|
void ExpandICallBranchFunnel(MachineBasicBlock *MBB,
|
|
|
|
MachineBasicBlock::iterator MBBI);
|
|
|
|
|
2015-12-22 07:04:27 +08:00
|
|
|
bool ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
|
|
|
|
bool ExpandMBB(MachineBasicBlock &MBB);
|
|
|
|
};
|
|
|
|
char X86ExpandPseudo::ID = 0;
|
2019-04-06 04:18:21 +08:00
|
|
|
|
2015-12-22 07:04:27 +08:00
|
|
|
} // End anonymous namespace.
|
|
|
|
|
2019-04-06 04:18:21 +08:00
|
|
|
INITIALIZE_PASS(X86ExpandPseudo, DEBUG_TYPE, X86_EXPAND_PSEUDO_NAME, false,
|
|
|
|
false)
|
|
|
|
|
2018-03-10 03:11:44 +08:00
|
|
|
void X86ExpandPseudo::ExpandICallBranchFunnel(
|
|
|
|
MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) {
|
|
|
|
MachineBasicBlock *JTMBB = MBB;
|
|
|
|
MachineInstr *JTInst = &*MBBI;
|
|
|
|
MachineFunction *MF = MBB->getParent();
|
|
|
|
const BasicBlock *BB = MBB->getBasicBlock();
|
|
|
|
auto InsPt = MachineFunction::iterator(MBB);
|
|
|
|
++InsPt;
|
|
|
|
|
|
|
|
std::vector<std::pair<MachineBasicBlock *, unsigned>> TargetMBBs;
|
|
|
|
DebugLoc DL = JTInst->getDebugLoc();
|
|
|
|
MachineOperand Selector = JTInst->getOperand(0);
|
|
|
|
const GlobalValue *CombinedGlobal = JTInst->getOperand(1).getGlobal();
|
|
|
|
|
|
|
|
auto CmpTarget = [&](unsigned Target) {
|
2019-07-04 01:16:45 +08:00
|
|
|
if (Selector.isReg())
|
|
|
|
MBB->addLiveIn(Selector.getReg());
|
2018-03-10 03:11:44 +08:00
|
|
|
BuildMI(*MBB, MBBI, DL, TII->get(X86::LEA64r), X86::R11)
|
|
|
|
.addReg(X86::RIP)
|
|
|
|
.addImm(1)
|
|
|
|
.addReg(0)
|
|
|
|
.addGlobalAddress(CombinedGlobal,
|
|
|
|
JTInst->getOperand(2 + 2 * Target).getImm())
|
|
|
|
.addReg(0);
|
|
|
|
BuildMI(*MBB, MBBI, DL, TII->get(X86::CMP64rr))
|
|
|
|
.add(Selector)
|
|
|
|
.addReg(X86::R11);
|
|
|
|
};
|
|
|
|
|
|
|
|
auto CreateMBB = [&]() {
|
|
|
|
auto *NewMBB = MF->CreateMachineBasicBlock(BB);
|
|
|
|
MBB->addSuccessor(NewMBB);
|
2019-07-04 01:16:45 +08:00
|
|
|
if (!MBB->isLiveIn(X86::EFLAGS))
|
|
|
|
MBB->addLiveIn(X86::EFLAGS);
|
2018-03-10 03:11:44 +08:00
|
|
|
return NewMBB;
|
|
|
|
};
|
|
|
|
|
[X86] Merge the different Jcc instructions for each condition code into single instructions that store the condition code as an operand.
Summary:
This avoids needing an isel pattern for each condition code. And it removes translation switches for converting between Jcc instructions and condition codes.
Now the printer, encoder and disassembler take care of converting the immediate. We use InstAliases to handle the assembly matching. But we print using the asm string in the instruction definition. The instruction itself is marked IsCodeGenOnly=1 to hide it from the assembly parser.
Reviewers: spatel, lebedev.ri, courbet, gchatelet, RKSimon
Reviewed By: RKSimon
Subscribers: MatzeB, qcolombet, eraman, hiraditya, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60228
llvm-svn: 357802
2019-04-06 03:28:09 +08:00
|
|
|
auto EmitCondJump = [&](unsigned CC, MachineBasicBlock *ThenMBB) {
|
|
|
|
BuildMI(*MBB, MBBI, DL, TII->get(X86::JCC_1)).addMBB(ThenMBB).addImm(CC);
|
2018-03-10 03:11:44 +08:00
|
|
|
|
|
|
|
auto *ElseMBB = CreateMBB();
|
|
|
|
MF->insert(InsPt, ElseMBB);
|
|
|
|
MBB = ElseMBB;
|
|
|
|
MBBI = MBB->end();
|
|
|
|
};
|
|
|
|
|
[X86] Merge the different Jcc instructions for each condition code into single instructions that store the condition code as an operand.
Summary:
This avoids needing an isel pattern for each condition code. And it removes translation switches for converting between Jcc instructions and condition codes.
Now the printer, encoder and disassembler take care of converting the immediate. We use InstAliases to handle the assembly matching. But we print using the asm string in the instruction definition. The instruction itself is marked IsCodeGenOnly=1 to hide it from the assembly parser.
Reviewers: spatel, lebedev.ri, courbet, gchatelet, RKSimon
Reviewed By: RKSimon
Subscribers: MatzeB, qcolombet, eraman, hiraditya, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60228
llvm-svn: 357802
2019-04-06 03:28:09 +08:00
|
|
|
auto EmitCondJumpTarget = [&](unsigned CC, unsigned Target) {
|
2018-03-10 03:11:44 +08:00
|
|
|
auto *ThenMBB = CreateMBB();
|
|
|
|
TargetMBBs.push_back({ThenMBB, Target});
|
[X86] Merge the different Jcc instructions for each condition code into single instructions that store the condition code as an operand.
Summary:
This avoids needing an isel pattern for each condition code. And it removes translation switches for converting between Jcc instructions and condition codes.
Now the printer, encoder and disassembler take care of converting the immediate. We use InstAliases to handle the assembly matching. But we print using the asm string in the instruction definition. The instruction itself is marked IsCodeGenOnly=1 to hide it from the assembly parser.
Reviewers: spatel, lebedev.ri, courbet, gchatelet, RKSimon
Reviewed By: RKSimon
Subscribers: MatzeB, qcolombet, eraman, hiraditya, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60228
llvm-svn: 357802
2019-04-06 03:28:09 +08:00
|
|
|
EmitCondJump(CC, ThenMBB);
|
2018-03-10 03:11:44 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
auto EmitTailCall = [&](unsigned Target) {
|
|
|
|
BuildMI(*MBB, MBBI, DL, TII->get(X86::TAILJMPd64))
|
|
|
|
.add(JTInst->getOperand(3 + 2 * Target));
|
|
|
|
};
|
|
|
|
|
|
|
|
std::function<void(unsigned, unsigned)> EmitBranchFunnel =
|
|
|
|
[&](unsigned FirstTarget, unsigned NumTargets) {
|
|
|
|
if (NumTargets == 1) {
|
|
|
|
EmitTailCall(FirstTarget);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NumTargets == 2) {
|
|
|
|
CmpTarget(FirstTarget + 1);
|
[X86] Merge the different Jcc instructions for each condition code into single instructions that store the condition code as an operand.
Summary:
This avoids needing an isel pattern for each condition code. And it removes translation switches for converting between Jcc instructions and condition codes.
Now the printer, encoder and disassembler take care of converting the immediate. We use InstAliases to handle the assembly matching. But we print using the asm string in the instruction definition. The instruction itself is marked IsCodeGenOnly=1 to hide it from the assembly parser.
Reviewers: spatel, lebedev.ri, courbet, gchatelet, RKSimon
Reviewed By: RKSimon
Subscribers: MatzeB, qcolombet, eraman, hiraditya, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60228
llvm-svn: 357802
2019-04-06 03:28:09 +08:00
|
|
|
EmitCondJumpTarget(X86::COND_B, FirstTarget);
|
2018-03-10 03:11:44 +08:00
|
|
|
EmitTailCall(FirstTarget + 1);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NumTargets < 6) {
|
|
|
|
CmpTarget(FirstTarget + 1);
|
[X86] Merge the different Jcc instructions for each condition code into single instructions that store the condition code as an operand.
Summary:
This avoids needing an isel pattern for each condition code. And it removes translation switches for converting between Jcc instructions and condition codes.
Now the printer, encoder and disassembler take care of converting the immediate. We use InstAliases to handle the assembly matching. But we print using the asm string in the instruction definition. The instruction itself is marked IsCodeGenOnly=1 to hide it from the assembly parser.
Reviewers: spatel, lebedev.ri, courbet, gchatelet, RKSimon
Reviewed By: RKSimon
Subscribers: MatzeB, qcolombet, eraman, hiraditya, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60228
llvm-svn: 357802
2019-04-06 03:28:09 +08:00
|
|
|
EmitCondJumpTarget(X86::COND_B, FirstTarget);
|
|
|
|
EmitCondJumpTarget(X86::COND_E, FirstTarget + 1);
|
2018-03-10 03:11:44 +08:00
|
|
|
EmitBranchFunnel(FirstTarget + 2, NumTargets - 2);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto *ThenMBB = CreateMBB();
|
|
|
|
CmpTarget(FirstTarget + (NumTargets / 2));
|
[X86] Merge the different Jcc instructions for each condition code into single instructions that store the condition code as an operand.
Summary:
This avoids needing an isel pattern for each condition code. And it removes translation switches for converting between Jcc instructions and condition codes.
Now the printer, encoder and disassembler take care of converting the immediate. We use InstAliases to handle the assembly matching. But we print using the asm string in the instruction definition. The instruction itself is marked IsCodeGenOnly=1 to hide it from the assembly parser.
Reviewers: spatel, lebedev.ri, courbet, gchatelet, RKSimon
Reviewed By: RKSimon
Subscribers: MatzeB, qcolombet, eraman, hiraditya, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60228
llvm-svn: 357802
2019-04-06 03:28:09 +08:00
|
|
|
EmitCondJump(X86::COND_B, ThenMBB);
|
|
|
|
EmitCondJumpTarget(X86::COND_E, FirstTarget + (NumTargets / 2));
|
2018-03-10 03:11:44 +08:00
|
|
|
EmitBranchFunnel(FirstTarget + (NumTargets / 2) + 1,
|
|
|
|
NumTargets - (NumTargets / 2) - 1);
|
|
|
|
|
|
|
|
MF->insert(InsPt, ThenMBB);
|
|
|
|
MBB = ThenMBB;
|
|
|
|
MBBI = MBB->end();
|
|
|
|
EmitBranchFunnel(FirstTarget, NumTargets / 2);
|
|
|
|
};
|
|
|
|
|
|
|
|
EmitBranchFunnel(0, (JTInst->getNumOperands() - 2) / 2);
|
|
|
|
for (auto P : TargetMBBs) {
|
|
|
|
MF->insert(InsPt, P.first);
|
|
|
|
BuildMI(P.first, DL, TII->get(X86::TAILJMPd64))
|
|
|
|
.add(JTInst->getOperand(3 + 2 * P.second));
|
|
|
|
}
|
|
|
|
JTMBB->erase(JTInst);
|
|
|
|
}
|
|
|
|
|
2015-12-22 07:04:27 +08:00
|
|
|
/// If \p MBBI is a pseudo instruction, this method expands
|
|
|
|
/// it to the corresponding (sequence of) actual instruction(s).
|
|
|
|
/// \returns true if \p MBBI has been expanded.
|
|
|
|
bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
|
|
|
MachineBasicBlock::iterator MBBI) {
|
|
|
|
MachineInstr &MI = *MBBI;
|
|
|
|
unsigned Opcode = MI.getOpcode();
|
|
|
|
DebugLoc DL = MBBI->getDebugLoc();
|
|
|
|
switch (Opcode) {
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case X86::TCRETURNdi:
|
2017-02-16 08:04:05 +08:00
|
|
|
case X86::TCRETURNdicc:
|
2015-12-22 07:04:27 +08:00
|
|
|
case X86::TCRETURNri:
|
|
|
|
case X86::TCRETURNmi:
|
|
|
|
case X86::TCRETURNdi64:
|
2017-02-16 08:04:05 +08:00
|
|
|
case X86::TCRETURNdi64cc:
|
2015-12-22 07:04:27 +08:00
|
|
|
case X86::TCRETURNri64:
|
|
|
|
case X86::TCRETURNmi64: {
|
|
|
|
bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
|
|
|
|
MachineOperand &JumpTarget = MBBI->getOperand(0);
|
2019-08-22 06:40:07 +08:00
|
|
|
MachineOperand &StackAdjust = MBBI->getOperand(isMem ? X86::AddrNumOperands
|
|
|
|
: 1);
|
2015-12-22 07:04:27 +08:00
|
|
|
assert(StackAdjust.isImm() && "Expecting immediate value.");
|
|
|
|
|
|
|
|
// Adjust stack pointer.
|
|
|
|
int StackAdj = StackAdjust.getImm();
|
2016-07-12 05:03:03 +08:00
|
|
|
int MaxTCDelta = X86FI->getTCReturnAddrDelta();
|
|
|
|
int Offset = 0;
|
|
|
|
assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
|
2015-12-22 07:04:27 +08:00
|
|
|
|
2016-07-12 05:03:03 +08:00
|
|
|
// Incoporate the retaddr area.
|
X86: Fold tail calls into conditional branches where possible (PR26302)
When branching to a block that immediately tail calls, it is possible to fold
the call directly into the branch if the call is direct and there is no stack
adjustment, saving one byte.
Example:
define void @f(i32 %x, i32 %y) {
entry:
%p = icmp eq i32 %x, %y
br i1 %p, label %bb1, label %bb2
bb1:
tail call void @foo()
ret void
bb2:
tail call void @bar()
ret void
}
before:
f:
movl 4(%esp), %eax
cmpl 8(%esp), %eax
jne .LBB0_2
jmp foo
.LBB0_2:
jmp bar
after:
f:
movl 4(%esp), %eax
cmpl 8(%esp), %eax
jne bar
.LBB0_1:
jmp foo
I don't expect any significant size savings from this (on a Clang bootstrap I
saw 288 bytes), but it does make the code a little tighter.
This patch only does 32-bit, but 64-bit would work similarly.
Differential Revision: https://reviews.llvm.org/D24108
llvm-svn: 280832
2016-09-08 01:52:14 +08:00
|
|
|
Offset = StackAdj - MaxTCDelta;
|
2016-07-12 05:03:03 +08:00
|
|
|
assert(Offset >= 0 && "Offset should never be negative");
|
|
|
|
|
2017-02-16 08:04:05 +08:00
|
|
|
if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) {
|
|
|
|
assert(Offset == 0 && "Conditional tail call cannot adjust the stack.");
|
|
|
|
}
|
|
|
|
|
2016-07-12 05:03:03 +08:00
|
|
|
if (Offset) {
|
2015-12-22 07:04:27 +08:00
|
|
|
// Check for possible merge with preceding ADD instruction.
|
2016-07-12 05:03:03 +08:00
|
|
|
Offset += X86FL->mergeSPUpdates(MBB, MBBI, true);
|
2018-02-15 01:35:52 +08:00
|
|
|
X86FL->emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue=*/true);
|
2015-12-22 07:04:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Jump to label or value in register.
|
|
|
|
bool IsWin64 = STI->isTargetWin64();
|
2017-02-16 08:04:05 +08:00
|
|
|
if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc ||
|
|
|
|
Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) {
|
X86: Fold tail calls into conditional branches where possible (PR26302)
When branching to a block that immediately tail calls, it is possible to fold
the call directly into the branch if the call is direct and there is no stack
adjustment, saving one byte.
Example:
define void @f(i32 %x, i32 %y) {
entry:
%p = icmp eq i32 %x, %y
br i1 %p, label %bb1, label %bb2
bb1:
tail call void @foo()
ret void
bb2:
tail call void @bar()
ret void
}
before:
f:
movl 4(%esp), %eax
cmpl 8(%esp), %eax
jne .LBB0_2
jmp foo
.LBB0_2:
jmp bar
after:
f:
movl 4(%esp), %eax
cmpl 8(%esp), %eax
jne bar
.LBB0_1:
jmp foo
I don't expect any significant size savings from this (on a Clang bootstrap I
saw 288 bytes), but it does make the code a little tighter.
This patch only does 32-bit, but 64-bit would work similarly.
Differential Revision: https://reviews.llvm.org/D24108
llvm-svn: 280832
2016-09-08 01:52:14 +08:00
|
|
|
unsigned Op;
|
|
|
|
switch (Opcode) {
|
|
|
|
case X86::TCRETURNdi:
|
|
|
|
Op = X86::TAILJMPd;
|
|
|
|
break;
|
2017-02-16 08:04:05 +08:00
|
|
|
case X86::TCRETURNdicc:
|
|
|
|
Op = X86::TAILJMPd_CC;
|
|
|
|
break;
|
|
|
|
case X86::TCRETURNdi64cc:
|
2017-02-17 03:04:42 +08:00
|
|
|
assert(!MBB.getParent()->hasWinCFI() &&
|
|
|
|
"Conditional tail calls confuse "
|
|
|
|
"the Win64 unwinder.");
|
2017-02-16 08:04:05 +08:00
|
|
|
Op = X86::TAILJMPd64_CC;
|
|
|
|
break;
|
X86: Fold tail calls into conditional branches where possible (PR26302)
When branching to a block that immediately tail calls, it is possible to fold
the call directly into the branch if the call is direct and there is no stack
adjustment, saving one byte.
Example:
define void @f(i32 %x, i32 %y) {
entry:
%p = icmp eq i32 %x, %y
br i1 %p, label %bb1, label %bb2
bb1:
tail call void @foo()
ret void
bb2:
tail call void @bar()
ret void
}
before:
f:
movl 4(%esp), %eax
cmpl 8(%esp), %eax
jne .LBB0_2
jmp foo
.LBB0_2:
jmp bar
after:
f:
movl 4(%esp), %eax
cmpl 8(%esp), %eax
jne bar
.LBB0_1:
jmp foo
I don't expect any significant size savings from this (on a Clang bootstrap I
saw 288 bytes), but it does make the code a little tighter.
This patch only does 32-bit, but 64-bit would work similarly.
Differential Revision: https://reviews.llvm.org/D24108
llvm-svn: 280832
2016-09-08 01:52:14 +08:00
|
|
|
default:
|
2016-09-09 07:35:10 +08:00
|
|
|
// Note: Win64 uses REX prefixes indirect jumps out of functions, but
|
|
|
|
// not direct ones.
|
|
|
|
Op = X86::TAILJMPd64;
|
X86: Fold tail calls into conditional branches where possible (PR26302)
When branching to a block that immediately tail calls, it is possible to fold
the call directly into the branch if the call is direct and there is no stack
adjustment, saving one byte.
Example:
define void @f(i32 %x, i32 %y) {
entry:
%p = icmp eq i32 %x, %y
br i1 %p, label %bb1, label %bb2
bb1:
tail call void @foo()
ret void
bb2:
tail call void @bar()
ret void
}
before:
f:
movl 4(%esp), %eax
cmpl 8(%esp), %eax
jne .LBB0_2
jmp foo
.LBB0_2:
jmp bar
after:
f:
movl 4(%esp), %eax
cmpl 8(%esp), %eax
jne bar
.LBB0_1:
jmp foo
I don't expect any significant size savings from this (on a Clang bootstrap I
saw 288 bytes), but it does make the code a little tighter.
This patch only does 32-bit, but 64-bit would work similarly.
Differential Revision: https://reviews.llvm.org/D24108
llvm-svn: 280832
2016-09-08 01:52:14 +08:00
|
|
|
break;
|
|
|
|
}
|
2015-12-22 07:04:27 +08:00
|
|
|
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
|
X86: Fold tail calls into conditional branches where possible (PR26302)
When branching to a block that immediately tail calls, it is possible to fold
the call directly into the branch if the call is direct and there is no stack
adjustment, saving one byte.
Example:
define void @f(i32 %x, i32 %y) {
entry:
%p = icmp eq i32 %x, %y
br i1 %p, label %bb1, label %bb2
bb1:
tail call void @foo()
ret void
bb2:
tail call void @bar()
ret void
}
before:
f:
movl 4(%esp), %eax
cmpl 8(%esp), %eax
jne .LBB0_2
jmp foo
.LBB0_2:
jmp bar
after:
f:
movl 4(%esp), %eax
cmpl 8(%esp), %eax
jne bar
.LBB0_1:
jmp foo
I don't expect any significant size savings from this (on a Clang bootstrap I
saw 288 bytes), but it does make the code a little tighter.
This patch only does 32-bit, but 64-bit would work similarly.
Differential Revision: https://reviews.llvm.org/D24108
llvm-svn: 280832
2016-09-08 01:52:14 +08:00
|
|
|
if (JumpTarget.isGlobal()) {
|
2015-12-22 07:04:27 +08:00
|
|
|
MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
|
|
|
|
JumpTarget.getTargetFlags());
|
X86: Fold tail calls into conditional branches where possible (PR26302)
When branching to a block that immediately tail calls, it is possible to fold
the call directly into the branch if the call is direct and there is no stack
adjustment, saving one byte.
Example:
define void @f(i32 %x, i32 %y) {
entry:
%p = icmp eq i32 %x, %y
br i1 %p, label %bb1, label %bb2
bb1:
tail call void @foo()
ret void
bb2:
tail call void @bar()
ret void
}
before:
f:
movl 4(%esp), %eax
cmpl 8(%esp), %eax
jne .LBB0_2
jmp foo
.LBB0_2:
jmp bar
after:
f:
movl 4(%esp), %eax
cmpl 8(%esp), %eax
jne bar
.LBB0_1:
jmp foo
I don't expect any significant size savings from this (on a Clang bootstrap I
saw 288 bytes), but it does make the code a little tighter.
This patch only does 32-bit, but 64-bit would work similarly.
Differential Revision: https://reviews.llvm.org/D24108
llvm-svn: 280832
2016-09-08 01:52:14 +08:00
|
|
|
} else {
|
2015-12-22 07:04:27 +08:00
|
|
|
assert(JumpTarget.isSymbol());
|
|
|
|
MIB.addExternalSymbol(JumpTarget.getSymbolName(),
|
|
|
|
JumpTarget.getTargetFlags());
|
|
|
|
}
|
2017-02-16 08:04:05 +08:00
|
|
|
if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) {
|
|
|
|
MIB.addImm(MBBI->getOperand(2).getImm());
|
|
|
|
}
|
|
|
|
|
2015-12-22 07:04:27 +08:00
|
|
|
} else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {
|
|
|
|
unsigned Op = (Opcode == X86::TCRETURNmi)
|
|
|
|
? X86::TAILJMPm
|
|
|
|
: (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64);
|
|
|
|
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
|
2019-08-22 06:40:07 +08:00
|
|
|
for (unsigned i = 0; i != X86::AddrNumOperands; ++i)
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MBBI->getOperand(i));
|
2015-12-22 07:04:27 +08:00
|
|
|
} else if (Opcode == X86::TCRETURNri64) {
|
2019-04-06 04:18:21 +08:00
|
|
|
JumpTarget.setIsKill();
|
2015-12-22 07:04:27 +08:00
|
|
|
BuildMI(MBB, MBBI, DL,
|
|
|
|
TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))
|
2019-04-06 04:18:21 +08:00
|
|
|
.add(JumpTarget);
|
2015-12-22 07:04:27 +08:00
|
|
|
} else {
|
2019-04-06 04:18:21 +08:00
|
|
|
JumpTarget.setIsKill();
|
2015-12-22 07:04:27 +08:00
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr))
|
2019-04-06 04:18:21 +08:00
|
|
|
.add(JumpTarget);
|
2015-12-22 07:04:27 +08:00
|
|
|
}
|
|
|
|
|
2016-07-12 11:18:50 +08:00
|
|
|
MachineInstr &NewMI = *std::prev(MBBI);
|
|
|
|
NewMI.copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI);
|
2019-06-27 21:10:29 +08:00
|
|
|
MBB.getParent()->updateCallSiteInfo(&*MBBI, &NewMI);
|
2015-12-22 07:04:27 +08:00
|
|
|
|
|
|
|
// Delete the pseudo instruction TCRETURN.
|
|
|
|
MBB.erase(MBBI);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
case X86::EH_RETURN:
|
|
|
|
case X86::EH_RETURN64: {
|
|
|
|
MachineOperand &DestAddr = MBBI->getOperand(0);
|
|
|
|
assert(DestAddr.isReg() && "Offset should be in register!");
|
|
|
|
const bool Uses64BitFramePtr =
|
|
|
|
STI->isTarget64BitLP64() || STI->isTargetNaCl64();
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register StackPtr = TRI->getStackRegister();
|
2015-12-22 07:04:27 +08:00
|
|
|
BuildMI(MBB, MBBI, DL,
|
|
|
|
TII->get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr)
|
|
|
|
.addReg(DestAddr.getReg());
|
|
|
|
// The EH_RETURN pseudo is really removed during the MC Lowering.
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
case X86::IRET: {
|
|
|
|
// Adjust stack to erase error code
|
|
|
|
int64_t StackAdj = MBBI->getOperand(0).getImm();
|
2018-02-15 01:35:52 +08:00
|
|
|
X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, true);
|
2015-12-22 07:04:27 +08:00
|
|
|
// Replace pseudo with machine iret
|
|
|
|
BuildMI(MBB, MBBI, DL,
|
|
|
|
TII->get(STI->is64Bit() ? X86::IRET64 : X86::IRET32));
|
|
|
|
MBB.erase(MBBI);
|
|
|
|
return true;
|
|
|
|
}
|
2016-03-05 06:56:17 +08:00
|
|
|
case X86::RET: {
|
|
|
|
// Adjust stack to erase error code
|
|
|
|
int64_t StackAdj = MBBI->getOperand(0).getImm();
|
|
|
|
MachineInstrBuilder MIB;
|
|
|
|
if (StackAdj == 0) {
|
|
|
|
MIB = BuildMI(MBB, MBBI, DL,
|
|
|
|
TII->get(STI->is64Bit() ? X86::RETQ : X86::RETL));
|
|
|
|
} else if (isUInt<16>(StackAdj)) {
|
|
|
|
MIB = BuildMI(MBB, MBBI, DL,
|
|
|
|
TII->get(STI->is64Bit() ? X86::RETIQ : X86::RETIL))
|
|
|
|
.addImm(StackAdj);
|
|
|
|
} else {
|
2016-03-05 07:02:15 +08:00
|
|
|
assert(!STI->is64Bit() &&
|
|
|
|
"shouldn't need to do this for x86_64 targets!");
|
2016-03-05 06:56:17 +08:00
|
|
|
// A ret can only handle immediates as big as 2**16-1. If we need to pop
|
|
|
|
// off bytes before the return address, we must do it manually.
|
2016-03-05 07:02:15 +08:00
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r)).addReg(X86::ECX, RegState::Define);
|
2018-02-15 01:35:52 +08:00
|
|
|
X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, /*InEpilogue=*/true);
|
2016-03-05 07:02:15 +08:00
|
|
|
BuildMI(MBB, MBBI, DL, TII->get(X86::PUSH32r)).addReg(X86::ECX);
|
|
|
|
MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RETL));
|
2016-03-05 06:56:17 +08:00
|
|
|
}
|
|
|
|
for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I)
|
2017-01-13 17:58:52 +08:00
|
|
|
MIB.add(MBBI->getOperand(I));
|
2016-03-05 06:56:17 +08:00
|
|
|
MBB.erase(MBBI);
|
|
|
|
return true;
|
|
|
|
}
|
2015-12-22 07:04:27 +08:00
|
|
|
case X86::EH_RESTORE: {
|
|
|
|
// Restore ESP and EBP, and optionally ESI if required.
|
|
|
|
bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(
|
2017-12-16 06:22:58 +08:00
|
|
|
MBB.getParent()->getFunction().getPersonalityFn()));
|
2015-12-22 07:04:27 +08:00
|
|
|
X86FL->restoreWin32EHStackPointers(MBB, MBBI, DL, /*RestoreSP=*/IsSEH);
|
|
|
|
MBBI->eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
[X86] Make sure we do not clobber RBX with cmpxchg when used as a base pointer.
cmpxchg[8|16]b uses RBX as one of its argument.
In other words, using this instruction clobbers RBX as it is defined to hold one
the input. When the backend uses dynamically allocated stack, RBX is used as a
reserved register for the base pointer.
Reserved registers have special semantic that only the target understands and
enforces, because of that, the register allocator don’t use them, but also,
don’t try to make sure they are used properly (remember it does not know how
they are supposed to be used).
Therefore, when RBX is used as a reserved register but defined by something that
is not compatible with that use, the register allocator will not fix the
surrounding code to make sure it gets saved and restored properly around the
broken code. This is the responsibility of the target to do the right thing with
its reserved register.
To fix that, when the base pointer needs to be preserved, we use a different
pseudo instruction for cmpxchg that save rbx.
That pseudo takes two more arguments than the regular instruction:
- One is the value to be copied into RBX to set the proper value for the
comparison.
- The other is the virtual register holding the save of the value of RBX as the
base pointer. This saving is done as part of isel (i.e., we emit a copy from
rbx).
cmpxchg_save_rbx <regular cmpxchg args>, input_for_rbx_reg, save_of_rbx_as_bp
This gets expanded into:
rbx = copy input_for_rbx_reg
cmpxchg <regular cmpxchg args>
rbx = save_of_rbx_as_bp
Note: The actual modeling of the pseudo is a bit more complicated to make sure
the interferes that appears after the pseudo gets expanded are properly modeled
before that expansion.
This fixes PR26883.
llvm-svn: 263325
2016-03-12 10:25:27 +08:00
|
|
|
case X86::LCMPXCHG8B_SAVE_EBX:
|
|
|
|
case X86::LCMPXCHG16B_SAVE_RBX: {
|
|
|
|
// Perform the following transformation.
|
|
|
|
// SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx
|
|
|
|
// =>
|
|
|
|
// [E|R]BX = InArg
|
|
|
|
// actualcmpxchg Addr
|
|
|
|
// [E|R]BX = SaveRbx
|
|
|
|
const MachineOperand &InArg = MBBI->getOperand(6);
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register SaveRbx = MBBI->getOperand(7).getReg();
|
[X86] Make sure we do not clobber RBX with cmpxchg when used as a base pointer.
cmpxchg[8|16]b uses RBX as one of its argument.
In other words, using this instruction clobbers RBX as it is defined to hold one
the input. When the backend uses dynamically allocated stack, RBX is used as a
reserved register for the base pointer.
Reserved registers have special semantic that only the target understands and
enforces, because of that, the register allocator don’t use them, but also,
don’t try to make sure they are used properly (remember it does not know how
they are supposed to be used).
Therefore, when RBX is used as a reserved register but defined by something that
is not compatible with that use, the register allocator will not fix the
surrounding code to make sure it gets saved and restored properly around the
broken code. This is the responsibility of the target to do the right thing with
its reserved register.
To fix that, when the base pointer needs to be preserved, we use a different
pseudo instruction for cmpxchg that save rbx.
That pseudo takes two more arguments than the regular instruction:
- One is the value to be copied into RBX to set the proper value for the
comparison.
- The other is the virtual register holding the save of the value of RBX as the
base pointer. This saving is done as part of isel (i.e., we emit a copy from
rbx).
cmpxchg_save_rbx <regular cmpxchg args>, input_for_rbx_reg, save_of_rbx_as_bp
This gets expanded into:
rbx = copy input_for_rbx_reg
cmpxchg <regular cmpxchg args>
rbx = save_of_rbx_as_bp
Note: The actual modeling of the pseudo is a bit more complicated to make sure
the interferes that appears after the pseudo gets expanded are properly modeled
before that expansion.
This fixes PR26883.
llvm-svn: 263325
2016-03-12 10:25:27 +08:00
|
|
|
|
|
|
|
unsigned ActualInArg =
|
|
|
|
Opcode == X86::LCMPXCHG8B_SAVE_EBX ? X86::EBX : X86::RBX;
|
|
|
|
// Copy the input argument of the pseudo into the argument of the
|
|
|
|
// actual instruction.
|
|
|
|
TII->copyPhysReg(MBB, MBBI, DL, ActualInArg, InArg.getReg(),
|
|
|
|
InArg.isKill());
|
|
|
|
// Create the actual instruction.
|
|
|
|
unsigned ActualOpc =
|
|
|
|
Opcode == X86::LCMPXCHG8B_SAVE_EBX ? X86::LCMPXCHG8B : X86::LCMPXCHG16B;
|
|
|
|
MachineInstr *NewInstr = BuildMI(MBB, MBBI, DL, TII->get(ActualOpc));
|
|
|
|
// Copy the operands related to the address.
|
|
|
|
for (unsigned Idx = 1; Idx < 6; ++Idx)
|
|
|
|
NewInstr->addOperand(MBBI->getOperand(Idx));
|
|
|
|
// Finally, restore the value of RBX.
|
|
|
|
TII->copyPhysReg(MBB, MBBI, DL, ActualInArg, SaveRbx,
|
|
|
|
/*SrcIsKill*/ true);
|
|
|
|
|
|
|
|
// Delete the pseudo.
|
|
|
|
MBBI->eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2018-03-10 03:11:44 +08:00
|
|
|
case TargetOpcode::ICALL_BRANCH_FUNNEL:
|
|
|
|
ExpandICallBranchFunnel(&MBB, MBBI);
|
|
|
|
return true;
|
2015-12-22 07:04:27 +08:00
|
|
|
}
|
|
|
|
llvm_unreachable("Previous switch has a fallthrough?");
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Expand all pseudo instructions contained in \p MBB.
|
|
|
|
/// \returns true if any expansion occurred for \p MBB.
|
|
|
|
bool X86ExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
|
|
|
bool Modified = false;
|
|
|
|
|
|
|
|
// MBBI may be invalidated by the expansion.
|
|
|
|
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
|
|
|
|
while (MBBI != E) {
|
|
|
|
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
|
|
|
|
Modified |= ExpandMI(MBB, MBBI);
|
|
|
|
MBBI = NMBBI;
|
|
|
|
}
|
|
|
|
|
|
|
|
return Modified;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool X86ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
|
|
|
|
STI = &static_cast<const X86Subtarget &>(MF.getSubtarget());
|
|
|
|
TII = STI->getInstrInfo();
|
|
|
|
TRI = STI->getRegisterInfo();
|
2016-07-12 05:03:03 +08:00
|
|
|
X86FI = MF.getInfo<X86MachineFunctionInfo>();
|
2015-12-22 07:04:27 +08:00
|
|
|
X86FL = STI->getFrameLowering();
|
|
|
|
|
|
|
|
bool Modified = false;
|
|
|
|
for (MachineBasicBlock &MBB : MF)
|
|
|
|
Modified |= ExpandMBB(MBB);
|
|
|
|
return Modified;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns an instance of the pseudo instruction expansion pass.
|
|
|
|
FunctionPass *llvm::createX86ExpandPseudoPass() {
|
|
|
|
return new X86ExpandPseudo();
|
|
|
|
}
|