2017-08-10 08:46:15 +08:00
|
|
|
//===- R600ExpandSpecialInstrs.cpp - Expand special instructions ----------===//
|
2012-12-12 05:25:42 +08:00
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2012-12-12 05:25:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
/// \file
|
|
|
|
/// Vector, Reduction, and Cube instructions need to fill the entire instruction
|
|
|
|
/// group to work correctly. This pass expands these individual instructions
|
|
|
|
/// into several instructions that will completely fill the instruction group.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "AMDGPU.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "AMDGPUSubtarget.h"
|
2012-12-12 05:25:42 +08:00
|
|
|
#include "R600Defines.h"
|
|
|
|
#include "R600InstrInfo.h"
|
2013-01-02 18:22:59 +08:00
|
|
|
#include "R600RegisterInfo.h"
|
2017-08-10 08:46:15 +08:00
|
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
2012-12-12 05:25:42 +08:00
|
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
2017-08-10 08:46:15 +08:00
|
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
2012-12-12 05:25:42 +08:00
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2017-08-10 08:46:15 +08:00
|
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
|
|
|
#include "llvm/Pass.h"
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <iterator>
|
2012-12-12 05:25:42 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
2017-08-03 06:19:45 +08:00
|
|
|
#define DEBUG_TYPE "r600-expand-special-instrs"
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
|
|
|
|
private:
|
2017-08-10 08:46:15 +08:00
|
|
|
const R600InstrInfo *TII = nullptr;
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2013-12-10 22:43:27 +08:00
|
|
|
void SetFlagInNewMI(MachineInstr *NewMI, const MachineInstr *OldMI,
|
|
|
|
unsigned Op);
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
public:
|
2017-08-03 06:19:45 +08:00
|
|
|
static char ID;
|
|
|
|
|
2017-08-10 08:46:15 +08:00
|
|
|
R600ExpandSpecialInstrsPass() : MachineFunctionPass(ID) {}
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2014-04-29 15:57:24 +08:00
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2016-10-01 10:56:57 +08:00
|
|
|
StringRef getPassName() const override {
|
2012-12-12 05:25:42 +08:00
|
|
|
return "R600 Expand special instructions pass";
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2017-08-10 08:46:15 +08:00
|
|
|
} // end anonymous namespace
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2017-08-03 06:19:45 +08:00
|
|
|
INITIALIZE_PASS_BEGIN(R600ExpandSpecialInstrsPass, DEBUG_TYPE,
|
|
|
|
"R600 Expand Special Instrs", false, false)
|
|
|
|
INITIALIZE_PASS_END(R600ExpandSpecialInstrsPass, DEBUG_TYPE,
|
|
|
|
"R600ExpandSpecialInstrs", false, false)
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
char R600ExpandSpecialInstrsPass::ID = 0;
|
|
|
|
|
2017-08-03 06:19:45 +08:00
|
|
|
char &llvm::R600ExpandSpecialInstrsPassID = R600ExpandSpecialInstrsPass::ID;
|
|
|
|
|
2017-05-19 01:21:13 +08:00
|
|
|
FunctionPass *llvm::createR600ExpandSpecialInstrsPass() {
|
|
|
|
return new R600ExpandSpecialInstrsPass();
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
|
2013-12-10 22:43:27 +08:00
|
|
|
void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI,
|
|
|
|
const MachineInstr *OldMI, unsigned Op) {
|
|
|
|
int OpIdx = TII->getOperandIdx(*OldMI, Op);
|
|
|
|
if (OpIdx > -1) {
|
|
|
|
uint64_t Val = OldMI->getOperand(OpIdx).getImm();
|
2016-06-30 08:01:54 +08:00
|
|
|
TII->setImmOperand(*NewMI, Op, Val);
|
2013-12-10 22:43:27 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
2016-06-24 14:30:11 +08:00
|
|
|
const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
|
|
|
|
TII = ST.getInstrInfo();
|
2012-12-12 05:25:42 +08:00
|
|
|
|
|
|
|
const R600RegisterInfo &TRI = TII->getRegisterInfo();
|
|
|
|
|
|
|
|
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
|
|
|
BB != BB_E; ++BB) {
|
|
|
|
MachineBasicBlock &MBB = *BB;
|
|
|
|
MachineBasicBlock::iterator I = MBB.begin();
|
|
|
|
while (I != MBB.end()) {
|
|
|
|
MachineInstr &MI = *I;
|
2014-03-02 20:27:27 +08:00
|
|
|
I = std::next(I);
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2013-11-15 08:12:45 +08:00
|
|
|
// Expand LDS_*_RET instructions
|
|
|
|
if (TII->isLDSRetInstr(MI.getOpcode())) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
|
2013-11-15 08:12:45 +08:00
|
|
|
assert(DstIdx != -1);
|
|
|
|
MachineOperand &DstOp = MI.getOperand(DstIdx);
|
|
|
|
MachineInstr *Mov = TII->buildMovInstr(&MBB, I,
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
DstOp.getReg(), R600::OQAP);
|
|
|
|
DstOp.setReg(R600::OQAP);
|
2013-11-15 08:12:45 +08:00
|
|
|
int LDSPredSelIdx = TII->getOperandIdx(MI.getOpcode(),
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
R600::OpName::pred_sel);
|
2013-11-15 08:12:45 +08:00
|
|
|
int MovPredSelIdx = TII->getOperandIdx(Mov->getOpcode(),
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
R600::OpName::pred_sel);
|
2013-11-15 08:12:45 +08:00
|
|
|
// Copy the pred_sel bit
|
|
|
|
Mov->getOperand(MovPredSelIdx).setReg(
|
|
|
|
MI.getOperand(LDSPredSelIdx).getReg());
|
|
|
|
}
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
default: break;
|
|
|
|
// Expand PRED_X to one of the PRED_SET instructions.
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::PRED_X: {
|
2012-12-12 05:25:42 +08:00
|
|
|
uint64_t Flags = MI.getOperand(3).getImm();
|
|
|
|
// The native opcode used by PRED_X is stored as an immediate in the
|
|
|
|
// third operand.
|
|
|
|
MachineInstr *PredSet = TII->buildDefaultInstruction(MBB, I,
|
|
|
|
MI.getOperand(2).getImm(), // opcode
|
|
|
|
MI.getOperand(0).getReg(), // dst
|
|
|
|
MI.getOperand(1).getReg(), // src0
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
R600::ZERO); // src1
|
2016-06-30 08:01:54 +08:00
|
|
|
TII->addFlag(*PredSet, 0, MO_FLAG_MASK);
|
2012-12-12 05:25:42 +08:00
|
|
|
if (Flags & MO_FLAG_PUSH) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->setImmOperand(*PredSet, R600::OpName::update_exec_mask, 1);
|
2012-12-12 05:25:42 +08:00
|
|
|
} else {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->setImmOperand(*PredSet, R600::OpName::update_pred, 1);
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
MI.eraseFromParent();
|
|
|
|
continue;
|
|
|
|
}
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::DOT_4: {
|
|
|
|
|
2013-05-18 00:50:32 +08:00
|
|
|
const R600RegisterInfo &TRI = TII->getRegisterInfo();
|
|
|
|
|
|
|
|
unsigned DstReg = MI.getOperand(0).getReg();
|
|
|
|
unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
|
|
|
|
|
|
|
|
for (unsigned Chan = 0; Chan < 4; ++Chan) {
|
|
|
|
bool Mask = (Chan != TRI.getHWRegChan(DstReg));
|
|
|
|
unsigned SubDstReg =
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
|
2013-05-18 00:50:32 +08:00
|
|
|
MachineInstr *BMI =
|
|
|
|
TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg);
|
|
|
|
if (Chan > 0) {
|
|
|
|
BMI->bundleWithPred();
|
|
|
|
}
|
|
|
|
if (Mask) {
|
2016-06-30 08:01:54 +08:00
|
|
|
TII->addFlag(*BMI, 0, MO_FLAG_MASK);
|
2013-05-18 00:50:32 +08:00
|
|
|
}
|
|
|
|
if (Chan != 3)
|
2016-06-30 08:01:54 +08:00
|
|
|
TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST);
|
2013-05-18 00:50:32 +08:00
|
|
|
unsigned Opcode = BMI->getOpcode();
|
|
|
|
// While not strictly necessary from hw point of view, we force
|
|
|
|
// all src operands of a dot4 inst to belong to the same slot.
|
|
|
|
unsigned Src0 = BMI->getOperand(
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0))
|
2013-05-18 00:50:32 +08:00
|
|
|
.getReg();
|
|
|
|
unsigned Src1 = BMI->getOperand(
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1))
|
2013-05-18 00:50:32 +08:00
|
|
|
.getReg();
|
2013-05-22 09:29:38 +08:00
|
|
|
(void) Src0;
|
|
|
|
(void) Src1;
|
2013-06-05 07:17:15 +08:00
|
|
|
if ((TRI.getEncodingValue(Src0) & 0xff) < 127 &&
|
|
|
|
(TRI.getEncodingValue(Src1) & 0xff) < 127)
|
|
|
|
assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1));
|
2013-05-18 00:50:32 +08:00
|
|
|
}
|
|
|
|
MI.eraseFromParent();
|
|
|
|
continue;
|
|
|
|
}
|
2013-02-06 01:09:14 +08:00
|
|
|
}
|
2012-12-12 05:25:42 +08:00
|
|
|
|
|
|
|
bool IsReduction = TII->isReductionOp(MI.getOpcode());
|
|
|
|
bool IsVector = TII->isVector(MI);
|
|
|
|
bool IsCube = TII->isCubeOp(MI.getOpcode());
|
|
|
|
if (!IsReduction && !IsVector && !IsCube) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Expand the instruction
|
|
|
|
//
|
|
|
|
// Reduction instructions:
|
|
|
|
// T0_X = DP4 T1_XYZW, T2_XYZW
|
|
|
|
// becomes:
|
|
|
|
// TO_X = DP4 T1_X, T2_X
|
|
|
|
// TO_Y (write masked) = DP4 T1_Y, T2_Y
|
|
|
|
// TO_Z (write masked) = DP4 T1_Z, T2_Z
|
|
|
|
// TO_W (write masked) = DP4 T1_W, T2_W
|
|
|
|
//
|
|
|
|
// Vector instructions:
|
|
|
|
// T0_X = MULLO_INT T1_X, T2_X
|
|
|
|
// becomes:
|
|
|
|
// T0_X = MULLO_INT T1_X, T2_X
|
|
|
|
// T0_Y (write masked) = MULLO_INT T1_X, T2_X
|
|
|
|
// T0_Z (write masked) = MULLO_INT T1_X, T2_X
|
|
|
|
// T0_W (write masked) = MULLO_INT T1_X, T2_X
|
|
|
|
//
|
|
|
|
// Cube instructions:
|
|
|
|
// T0_XYZW = CUBE T1_XYZW
|
|
|
|
// becomes:
|
|
|
|
// TO_X = CUBE T1_Z, T1_Y
|
|
|
|
// T0_Y = CUBE T1_Z, T1_X
|
|
|
|
// T0_Z = CUBE T1_X, T1_Z
|
|
|
|
// T0_W = CUBE T1_Y, T1_Z
|
|
|
|
for (unsigned Chan = 0; Chan < 4; Chan++) {
|
|
|
|
unsigned DstReg = MI.getOperand(
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->getOperandIdx(MI, R600::OpName::dst)).getReg();
|
2012-12-12 05:25:42 +08:00
|
|
|
unsigned Src0 = MI.getOperand(
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->getOperandIdx(MI, R600::OpName::src0)).getReg();
|
2012-12-12 05:25:42 +08:00
|
|
|
unsigned Src1 = 0;
|
|
|
|
|
|
|
|
// Determine the correct source registers
|
|
|
|
if (!IsCube) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
int Src1Idx = TII->getOperandIdx(MI, R600::OpName::src1);
|
2012-12-12 05:25:42 +08:00
|
|
|
if (Src1Idx != -1) {
|
|
|
|
Src1 = MI.getOperand(Src1Idx).getReg();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (IsReduction) {
|
2018-05-04 06:38:06 +08:00
|
|
|
unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(Chan);
|
2012-12-12 05:25:42 +08:00
|
|
|
Src0 = TRI.getSubReg(Src0, SubRegIndex);
|
|
|
|
Src1 = TRI.getSubReg(Src1, SubRegIndex);
|
|
|
|
} else if (IsCube) {
|
|
|
|
static const int CubeSrcSwz[] = {2, 2, 0, 1};
|
2018-05-04 06:38:06 +08:00
|
|
|
unsigned SubRegIndex0 = AMDGPURegisterInfo::getSubRegFromChannel(CubeSrcSwz[Chan]);
|
|
|
|
unsigned SubRegIndex1 = AMDGPURegisterInfo::getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
|
2012-12-12 05:25:42 +08:00
|
|
|
Src1 = TRI.getSubReg(Src0, SubRegIndex1);
|
|
|
|
Src0 = TRI.getSubReg(Src0, SubRegIndex0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Determine the correct destination registers;
|
|
|
|
bool Mask = false;
|
|
|
|
bool NotLast = true;
|
|
|
|
if (IsCube) {
|
2018-05-04 06:38:06 +08:00
|
|
|
unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(Chan);
|
2012-12-12 05:25:42 +08:00
|
|
|
DstReg = TRI.getSubReg(DstReg, SubRegIndex);
|
|
|
|
} else {
|
|
|
|
// Mask the write if the original instruction does not write to
|
|
|
|
// the current Channel.
|
|
|
|
Mask = (Chan != TRI.getHWRegChan(DstReg));
|
|
|
|
unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
DstReg = R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Set the IsLast bit
|
|
|
|
NotLast = (Chan != 3 );
|
|
|
|
|
|
|
|
// Add the new instruction
|
|
|
|
unsigned Opcode = MI.getOpcode();
|
|
|
|
switch (Opcode) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::CUBE_r600_pseudo:
|
|
|
|
Opcode = R600::CUBE_r600_real;
|
2012-12-12 05:25:42 +08:00
|
|
|
break;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::CUBE_eg_pseudo:
|
|
|
|
Opcode = R600::CUBE_eg_real;
|
2012-12-12 05:25:42 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineInstr *NewMI =
|
|
|
|
TII->buildDefaultInstruction(MBB, I, Opcode, DstReg, Src0, Src1);
|
|
|
|
|
2012-12-13 08:59:38 +08:00
|
|
|
if (Chan != 0)
|
|
|
|
NewMI->bundleWithPred();
|
2012-12-12 05:25:42 +08:00
|
|
|
if (Mask) {
|
2016-06-30 08:01:54 +08:00
|
|
|
TII->addFlag(*NewMI, 0, MO_FLAG_MASK);
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
if (NotLast) {
|
2016-06-30 08:01:54 +08:00
|
|
|
TII->addFlag(*NewMI, 0, MO_FLAG_NOT_LAST);
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
SetFlagInNewMI(NewMI, &MI, R600::OpName::clamp);
|
|
|
|
SetFlagInNewMI(NewMI, &MI, R600::OpName::literal);
|
|
|
|
SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_abs);
|
|
|
|
SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_abs);
|
|
|
|
SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_neg);
|
|
|
|
SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_neg);
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
MI.eraseFromParent();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|