forked from OSchip/llvm-project
170 lines
4.8 KiB
C++
170 lines
4.8 KiB
C++
//===-- SIPreEmitPeephole.cpp ------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
/// \file
|
|
/// This pass performs the peephole optimizations before code emission.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPU.h"
|
|
#include "AMDGPUSubtarget.h"
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
#include "SIInstrInfo.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "si-pre-emit-peephole"
|
|
|
|
namespace {
|
|
|
|
class SIPreEmitPeephole : public MachineFunctionPass {
|
|
private:
|
|
const SIInstrInfo *TII = nullptr;
|
|
const SIRegisterInfo *TRI = nullptr;
|
|
|
|
bool optimizeVccBranch(MachineInstr &MI) const;
|
|
|
|
public:
|
|
static char ID;
|
|
|
|
SIPreEmitPeephole() : MachineFunctionPass(ID) {
|
|
initializeSIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
};
|
|
|
|
} // End anonymous namespace.
|
|
|
|
INITIALIZE_PASS(SIPreEmitPeephole, DEBUG_TYPE,
|
|
"SI peephole optimizations", false, false)
|
|
|
|
char SIPreEmitPeephole::ID = 0;
|
|
|
|
char &llvm::SIPreEmitPeepholeID = SIPreEmitPeephole::ID;
|
|
|
|
bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
|
|
// Match:
|
|
// sreg = -1
|
|
// vcc = S_AND_B64 exec, sreg
|
|
// S_CBRANCH_VCC[N]Z
|
|
// =>
|
|
// S_CBRANCH_EXEC[N]Z
|
|
// We end up with this pattern sometimes after basic block placement.
|
|
// It happens while combining a block which assigns -1 to a saved mask and
|
|
// another block which consumes that saved mask and then a branch.
|
|
bool Changed = false;
|
|
MachineBasicBlock &MBB = *MI.getParent();
|
|
const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
|
|
const bool IsWave32 = ST.isWave32();
|
|
const unsigned CondReg = TRI->getVCC();
|
|
const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
|
|
const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
|
|
|
|
MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
|
|
E = MBB.rend();
|
|
bool ReadsCond = false;
|
|
unsigned Threshold = 5;
|
|
for (++A; A != E; ++A) {
|
|
if (!--Threshold)
|
|
return false;
|
|
if (A->modifiesRegister(ExecReg, TRI))
|
|
return false;
|
|
if (A->modifiesRegister(CondReg, TRI)) {
|
|
if (!A->definesRegister(CondReg, TRI) || A->getOpcode() != And)
|
|
return false;
|
|
break;
|
|
}
|
|
ReadsCond |= A->readsRegister(CondReg, TRI);
|
|
}
|
|
if (A == E)
|
|
return false;
|
|
|
|
MachineOperand &Op1 = A->getOperand(1);
|
|
MachineOperand &Op2 = A->getOperand(2);
|
|
if (Op1.getReg() != ExecReg && Op2.isReg() && Op2.getReg() == ExecReg) {
|
|
TII->commuteInstruction(*A);
|
|
Changed = true;
|
|
}
|
|
if (Op1.getReg() != ExecReg)
|
|
return Changed;
|
|
if (Op2.isImm() && Op2.getImm() != -1)
|
|
return Changed;
|
|
|
|
Register SReg;
|
|
if (Op2.isReg()) {
|
|
SReg = Op2.getReg();
|
|
auto M = std::next(A);
|
|
bool ReadsSreg = false;
|
|
for (; M != E; ++M) {
|
|
if (M->definesRegister(SReg, TRI))
|
|
break;
|
|
if (M->modifiesRegister(SReg, TRI))
|
|
return Changed;
|
|
ReadsSreg |= M->readsRegister(SReg, TRI);
|
|
}
|
|
if (M == E || !M->isMoveImmediate() || !M->getOperand(1).isImm() ||
|
|
M->getOperand(1).getImm() != -1)
|
|
return Changed;
|
|
// First if sreg is only used in and instruction fold the immediate
|
|
// into that and.
|
|
if (!ReadsSreg && Op2.isKill()) {
|
|
A->getOperand(2).ChangeToImmediate(-1);
|
|
M->eraseFromParent();
|
|
}
|
|
}
|
|
|
|
if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) &&
|
|
MI.killsRegister(CondReg, TRI))
|
|
A->eraseFromParent();
|
|
|
|
bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
|
|
if (SReg == ExecReg) {
|
|
if (IsVCCZ) {
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
MI.setDesc(TII->get(AMDGPU::S_BRANCH));
|
|
} else {
|
|
MI.setDesc(
|
|
TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ : AMDGPU::S_CBRANCH_EXECNZ));
|
|
}
|
|
|
|
MI.RemoveOperand(MI.findRegisterUseOperandIdx(CondReg, false /*Kill*/, TRI));
|
|
MI.addImplicitDefUseOperands(*MBB.getParent());
|
|
|
|
return true;
|
|
}
|
|
|
|
bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
TII = ST.getInstrInfo();
|
|
TRI = &TII->getRegisterInfo();
|
|
bool Changed = false;
|
|
|
|
for (MachineBasicBlock &MBB : MF) {
|
|
MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
|
|
if (MBBI == MBB.end())
|
|
continue;
|
|
|
|
MachineInstr &MI = *MBBI;
|
|
switch (MI.getOpcode()) {
|
|
case AMDGPU::S_CBRANCH_VCCZ:
|
|
case AMDGPU::S_CBRANCH_VCCNZ:
|
|
Changed |= optimizeVccBranch(MI);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
return Changed;
|
|
}
|