2017-08-10 08:46:15 +08:00
|
|
|
//===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===//
|
2013-04-02 05:48:05 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2013-04-02 05:48:05 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
/// \file
|
|
|
|
/// This pass compute turns all control flow pseudo instructions into native one
|
2017-08-10 08:46:15 +08:00
|
|
|
/// computing their address on the fly; it also sets STACK_SIZE info.
|
|
|
|
//
|
2013-04-02 05:48:05 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "AMDGPU.h"
|
2014-06-13 09:32:00 +08:00
|
|
|
#include "AMDGPUSubtarget.h"
|
2013-04-02 05:48:05 +08:00
|
|
|
#include "R600Defines.h"
|
|
|
|
#include "R600InstrInfo.h"
|
|
|
|
#include "R600MachineFunctionInfo.h"
|
|
|
|
#include "R600RegisterInfo.h"
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
2017-01-21 01:52:16 +08:00
|
|
|
#include "llvm/ADT/STLExtras.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/ADT/SmallVector.h"
|
2017-01-21 01:52:16 +08:00
|
|
|
#include "llvm/ADT/StringRef.h"
|
|
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
2013-04-02 05:48:05 +08:00
|
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
2017-01-21 01:52:16 +08:00
|
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
2013-04-02 05:48:05 +08:00
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2017-01-21 01:52:16 +08:00
|
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
|
|
|
#include "llvm/IR/CallingConv.h"
|
|
|
|
#include "llvm/IR/DebugLoc.h"
|
2017-08-10 08:46:15 +08:00
|
|
|
#include "llvm/IR/Function.h"
|
|
|
|
#include "llvm/Pass.h"
|
|
|
|
#include "llvm/Support/Compiler.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
2017-01-21 01:52:16 +08:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
2013-05-24 01:10:37 +08:00
|
|
|
#include "llvm/Support/raw_ostream.h"
|
2017-01-21 01:52:16 +08:00
|
|
|
#include <algorithm>
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <set>
|
|
|
|
#include <utility>
|
|
|
|
#include <vector>
|
2013-04-02 05:48:05 +08:00
|
|
|
|
2013-05-24 01:10:37 +08:00
|
|
|
using namespace llvm;
|
|
|
|
|
2014-04-22 10:41:26 +08:00
|
|
|
#define DEBUG_TYPE "r600cf"
|
|
|
|
|
2013-05-24 01:10:37 +08:00
|
|
|
namespace {
|
2013-04-02 05:48:05 +08:00
|
|
|
|
2014-01-23 05:55:43 +08:00
|
|
|
struct CFStack {
|
|
|
|
enum StackItem {
|
|
|
|
ENTRY = 0,
|
|
|
|
SUB_ENTRY = 1,
|
|
|
|
FIRST_NON_WQM_PUSH = 2,
|
|
|
|
FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
|
|
|
|
};
|
|
|
|
|
2016-06-24 14:30:11 +08:00
|
|
|
const R600Subtarget *ST;
|
2014-01-23 05:55:43 +08:00
|
|
|
std::vector<StackItem> BranchStack;
|
|
|
|
std::vector<StackItem> LoopStack;
|
|
|
|
unsigned MaxStackSize;
|
2017-01-21 01:52:16 +08:00
|
|
|
unsigned CurrentEntries = 0;
|
|
|
|
unsigned CurrentSubEntries = 0;
|
2014-01-23 05:55:43 +08:00
|
|
|
|
2016-06-24 14:30:11 +08:00
|
|
|
CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
|
2014-01-23 05:55:43 +08:00
|
|
|
// We need to reserve a stack entry for CALL_FS in vertex shaders.
|
2017-01-21 01:52:16 +08:00
|
|
|
MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
|
2014-01-23 05:55:43 +08:00
|
|
|
|
|
|
|
unsigned getLoopDepth();
|
|
|
|
bool branchStackContains(CFStack::StackItem);
|
|
|
|
bool requiresWorkAroundForInst(unsigned Opcode);
|
|
|
|
unsigned getSubEntrySize(CFStack::StackItem Item);
|
|
|
|
void updateMaxStackSize();
|
|
|
|
void pushBranch(unsigned Opcode, bool isWQM = false);
|
|
|
|
void pushLoop();
|
|
|
|
void popBranch();
|
|
|
|
void popLoop();
|
|
|
|
};
|
|
|
|
|
|
|
|
unsigned CFStack::getLoopDepth() {
|
|
|
|
return LoopStack.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CFStack::branchStackContains(CFStack::StackItem Item) {
|
|
|
|
for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
|
|
|
|
E = BranchStack.end(); I != E; ++I) {
|
|
|
|
if (*I == Item)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-01-24 00:18:02 +08:00
|
|
|
bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
|
2014-01-24 00:18:02 +08:00
|
|
|
getLoopDepth() > 1)
|
|
|
|
return true;
|
|
|
|
|
2015-01-31 07:24:40 +08:00
|
|
|
if (!ST->hasCFAluBug())
|
2014-01-24 00:18:02 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
switch(Opcode) {
|
|
|
|
default: return false;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::CF_ALU_PUSH_BEFORE:
|
|
|
|
case R600::CF_ALU_ELSE_AFTER:
|
|
|
|
case R600::CF_ALU_BREAK:
|
|
|
|
case R600::CF_ALU_CONTINUE:
|
2014-01-24 00:18:02 +08:00
|
|
|
if (CurrentSubEntries == 0)
|
|
|
|
return false;
|
2015-01-31 07:24:40 +08:00
|
|
|
if (ST->getWavefrontSize() == 64) {
|
2014-01-24 00:18:02 +08:00
|
|
|
// We are being conservative here. We only require this work-around if
|
|
|
|
// CurrentSubEntries > 3 &&
|
|
|
|
// (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
|
|
|
|
//
|
|
|
|
// We have to be conservative, because we don't know for certain that
|
|
|
|
// our stack allocation algorithm for Evergreen/NI is correct. Applying this
|
|
|
|
// work-around when CurrentSubEntries > 3 allows us to over-allocate stack
|
|
|
|
// resources without any problems.
|
|
|
|
return CurrentSubEntries > 3;
|
|
|
|
} else {
|
2015-01-31 07:24:40 +08:00
|
|
|
assert(ST->getWavefrontSize() == 32);
|
2014-01-24 00:18:02 +08:00
|
|
|
// We are being conservative here. We only require the work-around if
|
|
|
|
// CurrentSubEntries > 7 &&
|
|
|
|
// (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
|
|
|
|
// See the comment on the wavefront size == 64 case for why we are
|
|
|
|
// being conservative.
|
|
|
|
return CurrentSubEntries > 7;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-23 05:55:43 +08:00
|
|
|
unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
|
|
|
|
switch(Item) {
|
|
|
|
default:
|
|
|
|
return 0;
|
|
|
|
case CFStack::FIRST_NON_WQM_PUSH:
|
2015-01-31 07:24:40 +08:00
|
|
|
assert(!ST->hasCaymanISA());
|
2018-07-12 04:59:01 +08:00
|
|
|
if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
|
2014-01-23 05:55:43 +08:00
|
|
|
// +1 For the push operation.
|
|
|
|
// +2 Extra space required.
|
|
|
|
return 3;
|
|
|
|
} else {
|
|
|
|
// Some documentation says that this is not necessary on Evergreen,
|
|
|
|
// but experimentation has show that we need to allocate 1 extra
|
|
|
|
// sub-entry for the first non-WQM push.
|
|
|
|
// +1 For the push operation.
|
|
|
|
// +1 Extra space required.
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
|
2018-07-12 04:59:01 +08:00
|
|
|
assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
|
2014-01-23 05:55:43 +08:00
|
|
|
// +1 For the push operation.
|
|
|
|
// +1 Extra space required.
|
|
|
|
return 2;
|
|
|
|
case CFStack::SUB_ENTRY:
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CFStack::updateMaxStackSize() {
|
2016-01-15 05:06:47 +08:00
|
|
|
unsigned CurrentStackSize =
|
|
|
|
CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4);
|
2014-01-23 05:55:43 +08:00
|
|
|
MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
|
|
|
|
CFStack::StackItem Item = CFStack::ENTRY;
|
|
|
|
switch(Opcode) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::CF_PUSH_EG:
|
|
|
|
case R600::CF_ALU_PUSH_BEFORE:
|
2014-01-23 05:55:43 +08:00
|
|
|
if (!isWQM) {
|
2015-01-31 07:24:40 +08:00
|
|
|
if (!ST->hasCaymanISA() &&
|
|
|
|
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
|
2014-01-23 05:55:43 +08:00
|
|
|
Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI
|
|
|
|
// See comment in
|
|
|
|
// CFStack::getSubEntrySize()
|
|
|
|
else if (CurrentEntries > 0 &&
|
2018-07-12 04:59:01 +08:00
|
|
|
ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
|
2015-01-31 07:24:40 +08:00
|
|
|
!ST->hasCaymanISA() &&
|
2014-01-23 05:55:43 +08:00
|
|
|
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
|
|
|
|
Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
|
|
|
|
else
|
|
|
|
Item = CFStack::SUB_ENTRY;
|
|
|
|
} else
|
|
|
|
Item = CFStack::ENTRY;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
BranchStack.push_back(Item);
|
|
|
|
if (Item == CFStack::ENTRY)
|
|
|
|
CurrentEntries++;
|
|
|
|
else
|
|
|
|
CurrentSubEntries += getSubEntrySize(Item);
|
|
|
|
updateMaxStackSize();
|
|
|
|
}
|
|
|
|
|
|
|
|
void CFStack::pushLoop() {
|
|
|
|
LoopStack.push_back(CFStack::ENTRY);
|
|
|
|
CurrentEntries++;
|
|
|
|
updateMaxStackSize();
|
|
|
|
}
|
|
|
|
|
|
|
|
void CFStack::popBranch() {
|
|
|
|
CFStack::StackItem Top = BranchStack.back();
|
|
|
|
if (Top == CFStack::ENTRY)
|
|
|
|
CurrentEntries--;
|
|
|
|
else
|
|
|
|
CurrentSubEntries-= getSubEntrySize(Top);
|
|
|
|
BranchStack.pop_back();
|
|
|
|
}
|
|
|
|
|
|
|
|
void CFStack::popLoop() {
|
|
|
|
CurrentEntries--;
|
|
|
|
LoopStack.pop_back();
|
|
|
|
}
|
|
|
|
|
2013-04-02 05:48:05 +08:00
|
|
|
class R600ControlFlowFinalizer : public MachineFunctionPass {
|
|
|
|
private:
|
2017-08-10 08:46:15 +08:00
|
|
|
using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
|
2013-04-30 08:13:53 +08:00
|
|
|
|
2013-04-08 21:05:49 +08:00
|
|
|
enum ControlFlowInstruction {
|
|
|
|
CF_TC,
|
2013-04-30 08:13:39 +08:00
|
|
|
CF_VC,
|
2013-04-08 21:05:49 +08:00
|
|
|
CF_CALL_FS,
|
|
|
|
CF_WHILE_LOOP,
|
|
|
|
CF_END_LOOP,
|
|
|
|
CF_LOOP_BREAK,
|
|
|
|
CF_LOOP_CONTINUE,
|
|
|
|
CF_JUMP,
|
|
|
|
CF_ELSE,
|
2013-04-24 01:34:00 +08:00
|
|
|
CF_POP,
|
|
|
|
CF_END
|
2013-04-08 21:05:49 +08:00
|
|
|
};
|
2013-04-11 12:16:22 +08:00
|
|
|
|
2017-01-21 01:52:16 +08:00
|
|
|
const R600InstrInfo *TII = nullptr;
|
|
|
|
const R600RegisterInfo *TRI = nullptr;
|
2013-04-02 05:48:05 +08:00
|
|
|
unsigned MaxFetchInst;
|
2017-01-21 01:52:16 +08:00
|
|
|
const R600Subtarget *ST = nullptr;
|
2013-04-02 05:48:05 +08:00
|
|
|
|
2016-07-09 03:16:05 +08:00
|
|
|
bool IsTrivialInst(MachineInstr &MI) const {
|
|
|
|
switch (MI.getOpcode()) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::KILL:
|
|
|
|
case R600::RETURN:
|
2013-04-02 05:48:05 +08:00
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-04-08 21:05:49 +08:00
|
|
|
const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
|
2013-04-24 01:34:00 +08:00
|
|
|
unsigned Opcode = 0;
|
2018-07-12 04:59:01 +08:00
|
|
|
bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
|
2013-04-24 01:34:00 +08:00
|
|
|
switch (CFI) {
|
|
|
|
case CF_TC:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
|
2013-04-24 01:34:00 +08:00
|
|
|
break;
|
2013-04-30 08:13:39 +08:00
|
|
|
case CF_VC:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
|
2013-04-30 08:13:39 +08:00
|
|
|
break;
|
2013-04-24 01:34:00 +08:00
|
|
|
case CF_CALL_FS:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
|
2013-04-24 01:34:00 +08:00
|
|
|
break;
|
|
|
|
case CF_WHILE_LOOP:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
|
2013-04-24 01:34:00 +08:00
|
|
|
break;
|
|
|
|
case CF_END_LOOP:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
|
2013-04-24 01:34:00 +08:00
|
|
|
break;
|
|
|
|
case CF_LOOP_BREAK:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
|
2013-04-24 01:34:00 +08:00
|
|
|
break;
|
|
|
|
case CF_LOOP_CONTINUE:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
|
2013-04-24 01:34:00 +08:00
|
|
|
break;
|
|
|
|
case CF_JUMP:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
|
2013-04-24 01:34:00 +08:00
|
|
|
break;
|
|
|
|
case CF_ELSE:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
|
2013-04-24 01:34:00 +08:00
|
|
|
break;
|
|
|
|
case CF_POP:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Opcode = isEg ? R600::POP_EG : R600::POP_R600;
|
2013-04-24 01:34:00 +08:00
|
|
|
break;
|
|
|
|
case CF_END:
|
2015-01-31 07:24:40 +08:00
|
|
|
if (ST->hasCaymanISA()) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Opcode = R600::CF_END_CM;
|
2013-04-24 01:34:00 +08:00
|
|
|
break;
|
2013-04-08 21:05:49 +08:00
|
|
|
}
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
|
2013-04-24 01:34:00 +08:00
|
|
|
break;
|
2013-04-08 21:05:49 +08:00
|
|
|
}
|
2013-04-24 01:34:00 +08:00
|
|
|
assert (Opcode && "No opcode selected");
|
|
|
|
return TII->get(Opcode);
|
2013-04-08 21:05:49 +08:00
|
|
|
}
|
|
|
|
|
2016-07-09 03:16:05 +08:00
|
|
|
bool isCompatibleWithClause(const MachineInstr &MI,
|
|
|
|
std::set<unsigned> &DstRegs) const {
|
2013-04-30 08:14:00 +08:00
|
|
|
unsigned DstMI, SrcMI;
|
2016-07-09 03:16:05 +08:00
|
|
|
for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
|
|
|
|
E = MI.operands_end();
|
|
|
|
I != E; ++I) {
|
2013-04-30 08:14:00 +08:00
|
|
|
const MachineOperand &MO = *I;
|
|
|
|
if (!MO.isReg())
|
|
|
|
continue;
|
2013-05-24 02:26:42 +08:00
|
|
|
if (MO.isDef()) {
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register Reg = MO.getReg();
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (R600::R600_Reg128RegClass.contains(Reg))
|
2013-05-24 02:26:42 +08:00
|
|
|
DstMI = Reg;
|
|
|
|
else
|
2013-06-08 04:28:55 +08:00
|
|
|
DstMI = TRI->getMatchingSuperReg(Reg,
|
2018-05-04 06:38:06 +08:00
|
|
|
AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
&R600::R600_Reg128RegClass);
|
2013-05-24 02:26:42 +08:00
|
|
|
}
|
2013-04-30 08:14:00 +08:00
|
|
|
if (MO.isUse()) {
|
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Partial reverts in:
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister
X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister
HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned&
MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register
PPCFastISel.cpp - No Register::operator-=()
PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned&
MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor
Manual fixups in:
ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned&
HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register
HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register.
PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned&
Depends on D65919
Reviewers: arsenm, bogner, craig.topper, RKSimon
Reviewed By: arsenm
Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65962
llvm-svn: 369041
2019-08-16 03:22:08 +08:00
|
|
|
Register Reg = MO.getReg();
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (R600::R600_Reg128RegClass.contains(Reg))
|
2013-04-30 08:14:00 +08:00
|
|
|
SrcMI = Reg;
|
|
|
|
else
|
2013-06-08 04:28:55 +08:00
|
|
|
SrcMI = TRI->getMatchingSuperReg(Reg,
|
2018-05-04 06:38:06 +08:00
|
|
|
AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
&R600::R600_Reg128RegClass);
|
2013-04-30 08:14:00 +08:00
|
|
|
}
|
|
|
|
}
|
2013-06-08 07:30:26 +08:00
|
|
|
if ((DstRegs.find(SrcMI) == DstRegs.end())) {
|
2013-04-30 08:14:00 +08:00
|
|
|
DstRegs.insert(DstMI);
|
|
|
|
return true;
|
|
|
|
} else
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-04-30 08:13:53 +08:00
|
|
|
ClauseFile
|
|
|
|
MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
|
|
|
|
const {
|
2013-04-02 05:48:05 +08:00
|
|
|
MachineBasicBlock::iterator ClauseHead = I;
|
2013-04-30 08:13:53 +08:00
|
|
|
std::vector<MachineInstr *> ClauseContent;
|
2013-04-02 05:48:05 +08:00
|
|
|
unsigned AluInstCount = 0;
|
2016-06-30 08:01:54 +08:00
|
|
|
bool IsTex = TII->usesTextureCache(*ClauseHead);
|
2013-06-08 07:30:26 +08:00
|
|
|
std::set<unsigned> DstRegs;
|
2013-04-02 05:48:05 +08:00
|
|
|
for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
|
2016-07-09 03:16:05 +08:00
|
|
|
if (IsTrivialInst(*I))
|
2013-04-02 05:48:05 +08:00
|
|
|
continue;
|
2013-05-18 00:49:55 +08:00
|
|
|
if (AluInstCount >= MaxFetchInst)
|
2013-04-30 08:13:53 +08:00
|
|
|
break;
|
2016-06-30 08:01:54 +08:00
|
|
|
if ((IsTex && !TII->usesTextureCache(*I)) ||
|
|
|
|
(!IsTex && !TII->usesVertexCache(*I)))
|
2013-04-02 05:48:05 +08:00
|
|
|
break;
|
2016-07-09 03:16:05 +08:00
|
|
|
if (!isCompatibleWithClause(*I, DstRegs))
|
2013-04-30 08:14:00 +08:00
|
|
|
break;
|
2013-04-02 05:48:05 +08:00
|
|
|
AluInstCount ++;
|
2016-07-09 03:16:05 +08:00
|
|
|
ClauseContent.push_back(&*I);
|
2013-04-02 05:48:05 +08:00
|
|
|
}
|
2013-04-30 08:13:53 +08:00
|
|
|
MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
|
2013-04-30 08:13:39 +08:00
|
|
|
getHWInstrDesc(IsTex?CF_TC:CF_VC))
|
2013-04-30 08:13:53 +08:00
|
|
|
.addImm(0) // ADDR
|
|
|
|
.addImm(AluInstCount - 1); // COUNT
|
2014-10-04 02:33:16 +08:00
|
|
|
return ClauseFile(MIb, std::move(ClauseContent));
|
2013-04-02 05:48:05 +08:00
|
|
|
}
|
2013-04-30 08:13:53 +08:00
|
|
|
|
2016-07-09 03:16:05 +08:00
|
|
|
void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
|
2013-07-15 14:39:13 +08:00
|
|
|
static const unsigned LiteralRegs[] = {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
R600::ALU_LITERAL_X,
|
|
|
|
R600::ALU_LITERAL_Y,
|
|
|
|
R600::ALU_LITERAL_Z,
|
|
|
|
R600::ALU_LITERAL_W
|
2013-04-30 08:14:38 +08:00
|
|
|
};
|
2016-06-30 08:01:54 +08:00
|
|
|
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
|
2016-07-09 03:16:05 +08:00
|
|
|
TII->getSrcs(MI);
|
2016-05-14 04:39:22 +08:00
|
|
|
for (const auto &Src:Srcs) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (Src.first->getReg() != R600::ALU_LITERAL_X)
|
2013-04-30 08:14:38 +08:00
|
|
|
continue;
|
2016-05-14 04:39:22 +08:00
|
|
|
int64_t Imm = Src.second;
|
2016-08-12 08:18:03 +08:00
|
|
|
std::vector<MachineOperand *>::iterator It =
|
2017-01-21 01:52:16 +08:00
|
|
|
llvm::find_if(Lits, [&](MachineOperand *val) {
|
2016-08-12 08:18:03 +08:00
|
|
|
return val->isImm() && (val->getImm() == Imm);
|
|
|
|
});
|
2016-05-14 04:39:22 +08:00
|
|
|
|
|
|
|
// Get corresponding Operand
|
2016-07-09 03:16:05 +08:00
|
|
|
MachineOperand &Operand = MI.getOperand(
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
|
2016-05-14 04:39:22 +08:00
|
|
|
|
2013-04-30 08:14:38 +08:00
|
|
|
if (It != Lits.end()) {
|
2016-05-14 04:39:22 +08:00
|
|
|
// Reuse existing literal reg
|
2013-04-30 08:14:38 +08:00
|
|
|
unsigned Index = It - Lits.begin();
|
2016-05-14 04:39:22 +08:00
|
|
|
Src.first->setReg(LiteralRegs[Index]);
|
2013-04-30 08:14:38 +08:00
|
|
|
} else {
|
2016-05-14 04:39:22 +08:00
|
|
|
// Allocate new literal reg
|
2013-04-30 08:14:38 +08:00
|
|
|
assert(Lits.size() < 4 && "Too many literals in Instruction Group");
|
2016-05-14 04:39:22 +08:00
|
|
|
Src.first->setReg(LiteralRegs[Lits.size()]);
|
|
|
|
Lits.push_back(&Operand);
|
2013-04-30 08:14:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineBasicBlock::iterator insertLiterals(
|
|
|
|
MachineBasicBlock::iterator InsertPos,
|
|
|
|
const std::vector<unsigned> &Literals) const {
|
|
|
|
MachineBasicBlock *MBB = InsertPos->getParent();
|
|
|
|
for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
|
|
|
|
unsigned LiteralPair0 = Literals[i];
|
|
|
|
unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
|
|
|
|
InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->get(R600::LITERALS))
|
2013-04-30 08:14:38 +08:00
|
|
|
.addImm(LiteralPair0)
|
|
|
|
.addImm(LiteralPair1);
|
|
|
|
}
|
|
|
|
return InsertPos;
|
|
|
|
}
|
|
|
|
|
|
|
|
ClauseFile
|
|
|
|
MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
|
|
|
|
const {
|
2016-07-09 03:16:05 +08:00
|
|
|
MachineInstr &ClauseHead = *I;
|
2013-04-30 08:14:38 +08:00
|
|
|
std::vector<MachineInstr *> ClauseContent;
|
|
|
|
I++;
|
|
|
|
for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
|
2016-07-09 03:16:05 +08:00
|
|
|
if (IsTrivialInst(*I)) {
|
2013-04-30 08:14:38 +08:00
|
|
|
++I;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
|
|
|
|
break;
|
2016-05-14 04:39:22 +08:00
|
|
|
std::vector<MachineOperand *>Literals;
|
2013-04-30 08:14:38 +08:00
|
|
|
if (I->isBundle()) {
|
2016-07-09 03:16:05 +08:00
|
|
|
MachineInstr &DeleteMI = *I;
|
2016-02-23 05:30:15 +08:00
|
|
|
MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
|
2013-04-30 08:14:38 +08:00
|
|
|
while (++BI != E && BI->isBundledWithPred()) {
|
|
|
|
BI->unbundleFromPred();
|
2016-05-14 04:39:22 +08:00
|
|
|
for (MachineOperand &MO : BI->operands()) {
|
2013-04-30 08:14:38 +08:00
|
|
|
if (MO.isReg() && MO.isInternalRead())
|
|
|
|
MO.setIsInternalRead(false);
|
|
|
|
}
|
2016-07-09 03:16:05 +08:00
|
|
|
getLiteral(*BI, Literals);
|
2015-10-14 04:07:10 +08:00
|
|
|
ClauseContent.push_back(&*BI);
|
2013-04-30 08:14:38 +08:00
|
|
|
}
|
|
|
|
I = BI;
|
2016-07-09 03:16:05 +08:00
|
|
|
DeleteMI.eraseFromParent();
|
2013-04-30 08:14:38 +08:00
|
|
|
} else {
|
2016-07-09 03:16:05 +08:00
|
|
|
getLiteral(*I, Literals);
|
|
|
|
ClauseContent.push_back(&*I);
|
2013-04-30 08:14:38 +08:00
|
|
|
I++;
|
|
|
|
}
|
2016-05-14 04:39:22 +08:00
|
|
|
for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
|
|
|
|
MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->get(R600::LITERALS));
|
2016-05-14 04:39:22 +08:00
|
|
|
if (Literals[i]->isImm()) {
|
|
|
|
MILit.addImm(Literals[i]->getImm());
|
|
|
|
} else {
|
2016-05-14 04:39:29 +08:00
|
|
|
MILit.addGlobalAddress(Literals[i]->getGlobal(),
|
|
|
|
Literals[i]->getOffset());
|
2016-05-14 04:39:22 +08:00
|
|
|
}
|
|
|
|
if (i + 1 < e) {
|
|
|
|
if (Literals[i + 1]->isImm()) {
|
|
|
|
MILit.addImm(Literals[i + 1]->getImm());
|
|
|
|
} else {
|
2016-05-14 04:39:29 +08:00
|
|
|
MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
|
|
|
|
Literals[i + 1]->getOffset());
|
2016-05-14 04:39:22 +08:00
|
|
|
}
|
|
|
|
} else
|
|
|
|
MILit.addImm(0);
|
2013-04-30 08:14:38 +08:00
|
|
|
ClauseContent.push_back(MILit);
|
|
|
|
}
|
|
|
|
}
|
2013-07-09 23:03:33 +08:00
|
|
|
assert(ClauseContent.size() < 128 && "ALU clause is too big");
|
2016-07-09 03:16:05 +08:00
|
|
|
ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
|
|
|
|
return ClauseFile(&ClauseHead, std::move(ClauseContent));
|
2013-04-30 08:14:38 +08:00
|
|
|
}
|
|
|
|
|
2016-08-17 08:06:43 +08:00
|
|
|
void EmitFetchClause(MachineBasicBlock::iterator InsertPos,
|
|
|
|
const DebugLoc &DL, ClauseFile &Clause,
|
|
|
|
unsigned &CfCount) {
|
2016-07-09 03:16:05 +08:00
|
|
|
CounterPropagateAddr(*Clause.first, CfCount);
|
2013-04-30 08:13:53 +08:00
|
|
|
MachineBasicBlock *BB = Clause.first->getParent();
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
|
2013-04-30 08:13:53 +08:00
|
|
|
for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
|
|
|
|
BB->splice(InsertPos, BB, Clause.second[i]);
|
|
|
|
}
|
|
|
|
CfCount += 2 * Clause.second.size();
|
|
|
|
}
|
|
|
|
|
2016-08-17 08:06:43 +08:00
|
|
|
void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL,
|
|
|
|
ClauseFile &Clause, unsigned &CfCount) {
|
2013-07-09 23:03:33 +08:00
|
|
|
Clause.first->getOperand(0).setImm(0);
|
2016-07-09 03:16:05 +08:00
|
|
|
CounterPropagateAddr(*Clause.first, CfCount);
|
2013-04-30 08:14:38 +08:00
|
|
|
MachineBasicBlock *BB = Clause.first->getParent();
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
|
2013-04-30 08:14:38 +08:00
|
|
|
for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
|
|
|
|
BB->splice(InsertPos, BB, Clause.second[i]);
|
|
|
|
}
|
|
|
|
CfCount += Clause.second.size();
|
|
|
|
}
|
|
|
|
|
2016-07-09 03:16:05 +08:00
|
|
|
void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
|
|
|
|
MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
|
2013-04-02 05:48:05 +08:00
|
|
|
}
|
2014-10-05 00:55:56 +08:00
|
|
|
void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
|
|
|
|
unsigned Addr) const {
|
|
|
|
for (MachineInstr *MI : MIs) {
|
2016-07-09 03:16:05 +08:00
|
|
|
CounterPropagateAddr(*MI, Addr);
|
2013-04-02 05:48:05 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public:
|
2017-08-03 06:19:45 +08:00
|
|
|
static char ID;
|
|
|
|
|
2017-05-19 01:21:13 +08:00
|
|
|
R600ControlFlowFinalizer() : MachineFunctionPass(ID) {}
|
2013-04-02 05:48:05 +08:00
|
|
|
|
2014-04-29 15:57:24 +08:00
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override {
|
2016-06-24 14:30:11 +08:00
|
|
|
ST = &MF.getSubtarget<R600Subtarget>();
|
2015-01-31 07:24:40 +08:00
|
|
|
MaxFetchInst = ST->getTexVTXClauseSize();
|
2016-06-24 14:30:11 +08:00
|
|
|
TII = ST->getInstrInfo();
|
|
|
|
TRI = ST->getRegisterInfo();
|
|
|
|
|
2014-01-23 05:55:43 +08:00
|
|
|
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
|
2013-06-08 04:28:55 +08:00
|
|
|
|
2017-12-16 06:22:58 +08:00
|
|
|
CFStack CFStack(ST, MF.getFunction().getCallingConv());
|
2013-04-02 05:48:05 +08:00
|
|
|
for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
|
|
|
|
++MB) {
|
|
|
|
MachineBasicBlock &MBB = *MB;
|
|
|
|
unsigned CfCount = 0;
|
2017-01-21 01:52:16 +08:00
|
|
|
std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
|
2013-04-04 00:24:09 +08:00
|
|
|
std::vector<MachineInstr * > IfThenElseStack;
|
2017-12-16 06:22:58 +08:00
|
|
|
if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) {
|
2013-04-02 05:48:05 +08:00
|
|
|
BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
|
2013-04-08 21:05:49 +08:00
|
|
|
getHWInstrDesc(CF_CALL_FS));
|
2013-04-02 05:48:05 +08:00
|
|
|
CfCount++;
|
|
|
|
}
|
2013-04-30 08:14:38 +08:00
|
|
|
std::vector<ClauseFile> FetchClauses, AluClauses;
|
2013-07-20 05:45:15 +08:00
|
|
|
std::vector<MachineInstr *> LastAlu(1);
|
|
|
|
std::vector<MachineInstr *> ToPopAfter;
|
2016-06-10 10:18:02 +08:00
|
|
|
|
2013-04-02 05:48:05 +08:00
|
|
|
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
|
|
|
I != E;) {
|
2016-06-30 08:01:54 +08:00
|
|
|
if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
|
2013-04-30 08:13:53 +08:00
|
|
|
FetchClauses.push_back(MakeFetchClause(MBB, I));
|
2013-04-02 05:48:05 +08:00
|
|
|
CfCount++;
|
2014-04-25 13:30:21 +08:00
|
|
|
LastAlu.back() = nullptr;
|
2013-04-02 05:48:05 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineBasicBlock::iterator MI = I;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (MI->getOpcode() != R600::ENDIF)
|
2014-04-25 13:30:21 +08:00
|
|
|
LastAlu.back() = nullptr;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (MI->getOpcode() == R600::CF_ALU)
|
2016-07-09 03:16:05 +08:00
|
|
|
LastAlu.back() = &*MI;
|
2013-04-02 05:48:05 +08:00
|
|
|
I++;
|
2014-01-24 00:18:02 +08:00
|
|
|
bool RequiresWorkAround =
|
|
|
|
CFStack.requiresWorkAroundForInst(MI->getOpcode());
|
2013-04-02 05:48:05 +08:00
|
|
|
switch (MI->getOpcode()) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::CF_ALU_PUSH_BEFORE:
|
2014-01-24 00:18:02 +08:00
|
|
|
if (RequiresWorkAround) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs()
|
|
|
|
<< "Applying bug work-around for ALU_PUSH_BEFORE\n");
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
|
2013-12-03 01:29:37 +08:00
|
|
|
.addImm(CfCount + 1)
|
|
|
|
.addImm(1);
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
MI->setDesc(TII->get(R600::CF_ALU));
|
2013-12-03 01:29:37 +08:00
|
|
|
CfCount++;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
CFStack.pushBranch(R600::CF_PUSH_EG);
|
2014-01-23 05:55:43 +08:00
|
|
|
} else
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
|
2017-07-07 18:18:57 +08:00
|
|
|
LLVM_FALLTHROUGH;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::CF_ALU:
|
2013-04-30 08:14:38 +08:00
|
|
|
I = MI;
|
|
|
|
AluClauses.push_back(MakeALUClause(MBB, I));
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
|
2013-04-02 05:48:05 +08:00
|
|
|
CfCount++;
|
|
|
|
break;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::WHILELOOP: {
|
2014-01-23 05:55:43 +08:00
|
|
|
CFStack.pushLoop();
|
2013-04-02 05:48:05 +08:00
|
|
|
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
2013-04-08 21:05:49 +08:00
|
|
|
getHWInstrDesc(CF_WHILE_LOOP))
|
2013-04-10 21:29:20 +08:00
|
|
|
.addImm(1);
|
2017-01-21 01:52:16 +08:00
|
|
|
std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
|
2013-04-02 05:48:05 +08:00
|
|
|
std::set<MachineInstr *>());
|
|
|
|
Pair.second.insert(MIb);
|
2014-10-05 00:55:56 +08:00
|
|
|
LoopStack.push_back(std::move(Pair));
|
2013-04-02 05:48:05 +08:00
|
|
|
MI->eraseFromParent();
|
|
|
|
CfCount++;
|
|
|
|
break;
|
|
|
|
}
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::ENDLOOP: {
|
2014-01-23 05:55:43 +08:00
|
|
|
CFStack.popLoop();
|
2017-01-21 01:52:16 +08:00
|
|
|
std::pair<unsigned, std::set<MachineInstr *>> Pair =
|
2014-10-05 00:55:56 +08:00
|
|
|
std::move(LoopStack.back());
|
2013-04-02 05:48:05 +08:00
|
|
|
LoopStack.pop_back();
|
|
|
|
CounterPropagateAddr(Pair.second, CfCount);
|
2013-04-08 21:05:49 +08:00
|
|
|
BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
|
2013-04-02 05:48:05 +08:00
|
|
|
.addImm(Pair.first + 1);
|
|
|
|
MI->eraseFromParent();
|
|
|
|
CfCount++;
|
|
|
|
break;
|
|
|
|
}
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::IF_PREDICATE_SET: {
|
2014-04-25 13:30:21 +08:00
|
|
|
LastAlu.push_back(nullptr);
|
2013-04-02 05:48:05 +08:00
|
|
|
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
2013-04-08 21:05:49 +08:00
|
|
|
getHWInstrDesc(CF_JUMP))
|
2013-04-02 05:48:05 +08:00
|
|
|
.addImm(0)
|
|
|
|
.addImm(0);
|
2013-04-04 00:24:09 +08:00
|
|
|
IfThenElseStack.push_back(MIb);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
|
2013-04-02 05:48:05 +08:00
|
|
|
MI->eraseFromParent();
|
|
|
|
CfCount++;
|
|
|
|
break;
|
|
|
|
}
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::ELSE: {
|
2013-04-04 00:24:09 +08:00
|
|
|
MachineInstr * JumpInst = IfThenElseStack.back();
|
2013-04-02 05:48:05 +08:00
|
|
|
IfThenElseStack.pop_back();
|
2016-07-09 03:16:05 +08:00
|
|
|
CounterPropagateAddr(*JumpInst, CfCount);
|
2013-04-02 05:48:05 +08:00
|
|
|
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
2013-04-08 21:05:49 +08:00
|
|
|
getHWInstrDesc(CF_ELSE))
|
2013-04-02 05:48:05 +08:00
|
|
|
.addImm(0)
|
2013-07-20 05:45:15 +08:00
|
|
|
.addImm(0);
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
|
2013-04-04 00:24:09 +08:00
|
|
|
IfThenElseStack.push_back(MIb);
|
2013-04-02 05:48:05 +08:00
|
|
|
MI->eraseFromParent();
|
|
|
|
CfCount++;
|
|
|
|
break;
|
|
|
|
}
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::ENDIF: {
|
2014-01-23 05:55:43 +08:00
|
|
|
CFStack.popBranch();
|
2013-07-20 05:45:15 +08:00
|
|
|
if (LastAlu.back()) {
|
|
|
|
ToPopAfter.push_back(LastAlu.back());
|
|
|
|
} else {
|
|
|
|
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
|
|
|
getHWInstrDesc(CF_POP))
|
|
|
|
.addImm(CfCount + 1)
|
|
|
|
.addImm(1);
|
|
|
|
(void)MIb;
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
|
2013-07-20 05:45:15 +08:00
|
|
|
CfCount++;
|
|
|
|
}
|
2016-06-10 10:18:02 +08:00
|
|
|
|
2013-04-04 00:24:09 +08:00
|
|
|
MachineInstr *IfOrElseInst = IfThenElseStack.back();
|
2013-04-02 05:48:05 +08:00
|
|
|
IfThenElseStack.pop_back();
|
2016-07-09 03:16:05 +08:00
|
|
|
CounterPropagateAddr(*IfOrElseInst, CfCount);
|
2013-07-20 05:45:15 +08:00
|
|
|
IfOrElseInst->getOperand(1).setImm(1);
|
|
|
|
LastAlu.pop_back();
|
2013-04-02 05:48:05 +08:00
|
|
|
MI->eraseFromParent();
|
|
|
|
break;
|
|
|
|
}
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::BREAK: {
|
2013-08-01 03:31:14 +08:00
|
|
|
CfCount ++;
|
2013-04-02 05:48:05 +08:00
|
|
|
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
2013-04-08 21:05:49 +08:00
|
|
|
getHWInstrDesc(CF_LOOP_BREAK))
|
2013-04-02 05:48:05 +08:00
|
|
|
.addImm(0);
|
|
|
|
LoopStack.back().second.insert(MIb);
|
|
|
|
MI->eraseFromParent();
|
|
|
|
break;
|
|
|
|
}
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::CONTINUE: {
|
2013-04-02 05:48:05 +08:00
|
|
|
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
|
2013-04-08 21:05:49 +08:00
|
|
|
getHWInstrDesc(CF_LOOP_CONTINUE))
|
2013-04-04 00:24:09 +08:00
|
|
|
.addImm(0);
|
2013-04-02 05:48:05 +08:00
|
|
|
LoopStack.back().second.insert(MIb);
|
|
|
|
MI->eraseFromParent();
|
|
|
|
CfCount++;
|
|
|
|
break;
|
|
|
|
}
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::RETURN: {
|
2016-08-17 08:06:43 +08:00
|
|
|
DebugLoc DL = MBB.findDebugLoc(MI);
|
|
|
|
BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
|
2013-04-24 01:34:00 +08:00
|
|
|
CfCount++;
|
|
|
|
if (CfCount % 2) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
BuildMI(MBB, I, DL, TII->get(R600::PAD));
|
2013-04-24 01:34:00 +08:00
|
|
|
CfCount++;
|
|
|
|
}
|
2016-03-26 02:33:16 +08:00
|
|
|
MI->eraseFromParent();
|
2013-04-30 08:13:53 +08:00
|
|
|
for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
|
2016-08-17 08:06:43 +08:00
|
|
|
EmitFetchClause(I, DL, FetchClauses[i], CfCount);
|
2013-04-30 08:14:38 +08:00
|
|
|
for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
|
2016-08-17 08:06:43 +08:00
|
|
|
EmitALUClause(I, DL, AluClauses[i], CfCount);
|
2016-03-26 02:33:16 +08:00
|
|
|
break;
|
2013-04-24 01:34:00 +08:00
|
|
|
}
|
2013-04-02 05:48:05 +08:00
|
|
|
default:
|
2013-08-16 09:11:51 +08:00
|
|
|
if (TII->isExport(MI->getOpcode())) {
|
2018-05-14 20:53:11 +08:00
|
|
|
LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
|
2013-08-16 09:11:51 +08:00
|
|
|
CfCount++;
|
|
|
|
}
|
2013-04-02 05:48:05 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2013-07-20 05:45:15 +08:00
|
|
|
for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
|
|
|
|
MachineInstr *Alu = ToPopAfter[i];
|
|
|
|
BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->get(R600::CF_ALU_POP_AFTER))
|
2013-07-20 05:45:15 +08:00
|
|
|
.addImm(Alu->getOperand(0).getImm())
|
|
|
|
.addImm(Alu->getOperand(1).getImm())
|
|
|
|
.addImm(Alu->getOperand(2).getImm())
|
|
|
|
.addImm(Alu->getOperand(3).getImm())
|
|
|
|
.addImm(Alu->getOperand(4).getImm())
|
|
|
|
.addImm(Alu->getOperand(5).getImm())
|
|
|
|
.addImm(Alu->getOperand(6).getImm())
|
|
|
|
.addImm(Alu->getOperand(7).getImm())
|
|
|
|
.addImm(Alu->getOperand(8).getImm());
|
|
|
|
Alu->eraseFromParent();
|
|
|
|
}
|
2016-07-23 01:01:25 +08:00
|
|
|
MFI->CFStackSize = CFStack.MaxStackSize;
|
2013-04-02 05:48:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-10-01 10:56:57 +08:00
|
|
|
StringRef getPassName() const override {
|
2013-04-02 05:48:05 +08:00
|
|
|
return "R600 Control Flow Finalizer Pass";
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2017-08-03 06:19:45 +08:00
|
|
|
} // end anonymous namespace
|
|
|
|
|
|
|
|
INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE,
|
|
|
|
"R600 Control Flow Finalizer", false, false)
|
|
|
|
INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE,
|
|
|
|
"R600 Control Flow Finalizer", false, false)
|
|
|
|
|
2013-04-02 05:48:05 +08:00
|
|
|
char R600ControlFlowFinalizer::ID = 0;
|
|
|
|
|
2017-08-03 06:19:45 +08:00
|
|
|
char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID;
|
2013-04-02 05:48:05 +08:00
|
|
|
|
2017-05-19 01:21:13 +08:00
|
|
|
FunctionPass *llvm::createR600ControlFlowFinalizer() {
|
|
|
|
return new R600ControlFlowFinalizer();
|
2013-04-02 05:48:05 +08:00
|
|
|
}
|