forked from OSchip/llvm-project
R600: Factorize code handling Const Read Port limitation
llvm-svn: 177078
This commit is contained in:
parent
d1999a1ccc
commit
0a22bc4156
|
@ -365,17 +365,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
|
|||
SDValue Operand = Ops[OperandIdx[i] - 1];
|
||||
switch (Operand.getOpcode()) {
|
||||
case AMDGPUISD::CONST_ADDRESS: {
|
||||
if (i == 2)
|
||||
break;
|
||||
SDValue CstOffset;
|
||||
if (!Operand.getValueType().isVector() &&
|
||||
SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
|
||||
Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
|
||||
Ops[SelIdx[i] - 1] = CstOffset;
|
||||
return true;
|
||||
if (Operand.getValueType().isVector() ||
|
||||
!SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
|
||||
break;
|
||||
|
||||
// Gather others constants values
|
||||
std::vector<unsigned> Consts;
|
||||
for (unsigned j = 0; j < 3; j++) {
|
||||
int SrcIdx = OperandIdx[j];
|
||||
if (SrcIdx < 0)
|
||||
break;
|
||||
if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
|
||||
if (Reg->getReg() == AMDGPU::ALU_CONST) {
|
||||
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
|
||||
Consts.push_back(Cst->getZExtValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
|
||||
Consts.push_back(Cst->getZExtValue());
|
||||
if (!TII->fitsConstReadLimitations(Consts))
|
||||
break;
|
||||
|
||||
Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
|
||||
Ops[SelIdx[i] - 1] = CstOffset;
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case ISD::FNEG:
|
||||
if (NegIdx[i] < 0)
|
||||
break;
|
||||
|
|
|
@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
|
|||
(TargetFlags & R600_InstFlag::OP3));
|
||||
}
|
||||
|
||||
bool
|
||||
R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
|
||||
const {
|
||||
assert (Consts.size() <= 12 && "Too many operands in instructions group");
|
||||
unsigned Pair1 = 0, Pair2 = 0;
|
||||
for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
|
||||
unsigned ReadConstHalf = Consts[i] & 2;
|
||||
unsigned ReadConstIndex = Consts[i] & (~3);
|
||||
unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
|
||||
if (!Pair1) {
|
||||
Pair1 = ReadHalfConst;
|
||||
continue;
|
||||
}
|
||||
if (Pair1 == ReadHalfConst)
|
||||
continue;
|
||||
if (!Pair2) {
|
||||
Pair2 = ReadHalfConst;
|
||||
continue;
|
||||
}
|
||||
if (Pair2 != ReadHalfConst)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
|
||||
std::vector<unsigned> Consts;
|
||||
for (unsigned i = 0, n = MIs.size(); i < n; i++) {
|
||||
const MachineInstr *MI = MIs[i];
|
||||
|
||||
const R600Operands::Ops OpTable[3][2] = {
|
||||
{R600Operands::SRC0, R600Operands::SRC0_SEL},
|
||||
{R600Operands::SRC1, R600Operands::SRC1_SEL},
|
||||
{R600Operands::SRC2, R600Operands::SRC2_SEL},
|
||||
};
|
||||
|
||||
if (!isALUInstr(MI->getOpcode()))
|
||||
continue;
|
||||
|
||||
for (unsigned j = 0; j < 3; j++) {
|
||||
int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
|
||||
if (SrcIdx < 0)
|
||||
break;
|
||||
if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
|
||||
unsigned Const = MI->getOperand(
|
||||
getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
|
||||
Consts.push_back(Const);
|
||||
}
|
||||
}
|
||||
}
|
||||
return fitsConstReadLimitations(Consts);
|
||||
}
|
||||
|
||||
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
|
||||
const ScheduleDAG *DAG) const {
|
||||
const InstrItineraryData *II = TM->getInstrItineraryData();
|
||||
|
|
|
@ -53,6 +53,9 @@ namespace llvm {
|
|||
/// \returns true if this \p Opcode represents an ALU instruction.
|
||||
bool isALUInstr(unsigned Opcode) const;
|
||||
|
||||
bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
|
||||
bool canBundle(const std::vector<MachineInstr *> &) const;
|
||||
|
||||
/// \breif Vector instructions are instructions that must fill all
|
||||
/// instruction slots within an instruction group.
|
||||
bool isVector(const MachineInstr &MI) const;
|
||||
|
|
|
@ -37,7 +37,6 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
|
|||
CurInstKind = IDOther;
|
||||
CurEmitted = 0;
|
||||
OccupedSlotsMask = 15;
|
||||
memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
|
||||
InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
|
||||
|
||||
|
||||
|
@ -288,79 +287,19 @@ int R600SchedStrategy::getInstKind(SUnit* SU) {
|
|||
}
|
||||
}
|
||||
|
||||
class ConstPairs {
|
||||
private:
|
||||
unsigned XYPair;
|
||||
unsigned ZWPair;
|
||||
public:
|
||||
ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) {
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
unsigned ReadConstChan = ReadConst[i] & 3;
|
||||
unsigned ReadConstIndex = ReadConst[i] & (~3);
|
||||
if (ReadConstChan < 2) {
|
||||
if (!XYPair) {
|
||||
XYPair = ReadConstIndex;
|
||||
}
|
||||
} else {
|
||||
if (!ZWPair) {
|
||||
ZWPair = ReadConstIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool isCompatibleWith(const ConstPairs& CP) const {
|
||||
return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) &&
|
||||
(!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair);
|
||||
}
|
||||
};
|
||||
|
||||
static
|
||||
const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) {
|
||||
unsigned ReadConsts[3] = {0, 0, 0};
|
||||
R600Operands::Ops OpTable[3][2] = {
|
||||
{R600Operands::SRC0, R600Operands::SRC0_SEL},
|
||||
{R600Operands::SRC1, R600Operands::SRC1_SEL},
|
||||
{R600Operands::SRC2, R600Operands::SRC2_SEL},
|
||||
};
|
||||
|
||||
if (!TII->isALUInstr(MI.getOpcode()))
|
||||
return ConstPairs(ReadConsts);
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]);
|
||||
if (SrcIdx < 0)
|
||||
break;
|
||||
if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST)
|
||||
ReadConsts[i] =MI.getOperand(
|
||||
TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm();
|
||||
}
|
||||
return ConstPairs(ReadConsts);
|
||||
}
|
||||
|
||||
bool
|
||||
R600SchedStrategy::isBundleable(const MachineInstr& MI) {
|
||||
const ConstPairs &MIPair = getPairs(TII, MI);
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
if (!InstructionsGroupCandidate[i])
|
||||
continue;
|
||||
const ConstPairs &IGPair = getPairs(TII,
|
||||
*InstructionsGroupCandidate[i]->getInstr());
|
||||
if (!IGPair.isCompatibleWith(MIPair))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
|
||||
if (Q.empty())
|
||||
return NULL;
|
||||
for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
|
||||
It != E; ++It) {
|
||||
SUnit *SU = *It;
|
||||
if (isBundleable(*SU->getInstr())) {
|
||||
InstructionsGroupCandidate.push_back(SU->getInstr());
|
||||
if (TII->canBundle(InstructionsGroupCandidate)) {
|
||||
InstructionsGroupCandidate.pop_back();
|
||||
Q.erase(It);
|
||||
return SU;
|
||||
} else {
|
||||
InstructionsGroupCandidate.pop_back();
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
|
@ -381,7 +320,7 @@ void R600SchedStrategy::PrepareNextSlot() {
|
|||
DEBUG(dbgs() << "New Slot\n");
|
||||
assert (OccupedSlotsMask && "Slot wasn't filled");
|
||||
OccupedSlotsMask = 0;
|
||||
memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
|
||||
InstructionsGroupCandidate.clear();
|
||||
LoadAlu();
|
||||
}
|
||||
|
||||
|
@ -462,7 +401,7 @@ SUnit* R600SchedStrategy::pickAlu() {
|
|||
SUnit *SU = AttemptFillSlot(Chan);
|
||||
if (SU) {
|
||||
OccupedSlotsMask |= (1 << Chan);
|
||||
InstructionsGroupCandidate[Chan] = SU;
|
||||
InstructionsGroupCandidate.push_back(SU->getInstr());
|
||||
return SU;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -98,7 +98,7 @@ public:
|
|||
virtual void releaseBottomNode(SUnit *SU);
|
||||
|
||||
private:
|
||||
SUnit *InstructionsGroupCandidate[4];
|
||||
std::vector<MachineInstr *> InstructionsGroupCandidate;
|
||||
|
||||
int getInstKind(SUnit *SU);
|
||||
bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
|
||||
|
@ -112,7 +112,6 @@ private:
|
|||
void AssignSlot(MachineInstr *MI, unsigned Slot);
|
||||
SUnit* pickAlu();
|
||||
SUnit* pickOther(int QID);
|
||||
bool isBundleable(const MachineInstr& MI);
|
||||
void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
|
||||
};
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
|
||||
; CHECK: @main1
|
||||
; CHECK: MOV T{{[0-9]+\.[XYZW], CBuf0\[[0-9]+\]\.[XYZW]}}
|
||||
|
||||
define void @main() {
|
||||
define void @main1() {
|
||||
main_body:
|
||||
%0 = load <4 x float> addrspace(8)* null
|
||||
%1 = extractelement <4 x float> %0, i32 0
|
||||
|
@ -48,5 +48,53 @@ main_body:
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @main2
|
||||
; CHECK-NOT: MOV
|
||||
define void @main2() {
|
||||
main_body:
|
||||
%0 = load <4 x float> addrspace(8)* null
|
||||
%1 = extractelement <4 x float> %0, i32 0
|
||||
%2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
|
||||
%3 = extractelement <4 x float> %2, i32 0
|
||||
%4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
|
||||
%5 = extractelement <4 x float> %4, i32 1
|
||||
%6 = fcmp ult float %1, 0.000000e+00
|
||||
%7 = select i1 %6, float %3, float %5
|
||||
%8 = load <4 x float> addrspace(8)* null
|
||||
%9 = extractelement <4 x float> %8, i32 1
|
||||
%10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
|
||||
%11 = extractelement <4 x float> %10, i32 0
|
||||
%12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
|
||||
%13 = extractelement <4 x float> %12, i32 1
|
||||
%14 = fcmp ult float %9, 0.000000e+00
|
||||
%15 = select i1 %14, float %11, float %13
|
||||
%16 = load <4 x float> addrspace(8)* null
|
||||
%17 = extractelement <4 x float> %16, i32 2
|
||||
%18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
|
||||
%19 = extractelement <4 x float> %18, i32 3
|
||||
%20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
|
||||
%21 = extractelement <4 x float> %20, i32 2
|
||||
%22 = fcmp ult float %17, 0.000000e+00
|
||||
%23 = select i1 %22, float %19, float %21
|
||||
%24 = load <4 x float> addrspace(8)* null
|
||||
%25 = extractelement <4 x float> %24, i32 3
|
||||
%26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
|
||||
%27 = extractelement <4 x float> %26, i32 3
|
||||
%28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
|
||||
%29 = extractelement <4 x float> %28, i32 2
|
||||
%30 = fcmp ult float %25, 0.000000e+00
|
||||
%31 = select i1 %30, float %27, float %29
|
||||
%32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
|
||||
%33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
|
||||
%34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00)
|
||||
%35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00)
|
||||
%36 = insertelement <4 x float> undef, float %32, i32 0
|
||||
%37 = insertelement <4 x float> %36, float %33, i32 1
|
||||
%38 = insertelement <4 x float> %37, float %34, i32 2
|
||||
%39 = insertelement <4 x float> %38, float %35, i32 3
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.AMDIL.clamp.(float, float, float) readnone
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
|
Loading…
Reference in New Issue