R600: Const/Neg/Abs can be folded to dot4

llvm-svn: 183278
This commit is contained in:
Vincent Lejeune 2013-06-04 23:17:15 +00:00
parent 31caf980c6
commit c689679173
6 changed files with 213 additions and 47 deletions

View File

@ -49,7 +49,10 @@ public:
private:
inline SDValue getSmallIPtrImm(unsigned Imm);
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
const R600InstrInfo *TII, std::vector<unsigned> Cst);
bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
// Complex pattern selectors
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
@ -318,6 +321,20 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
const R600InstrInfo *TII =
static_cast<const R600InstrInfo*>(TM.getInstrInfo());
if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
bool IsModified = false;
do {
std::vector<SDValue> Ops;
for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
I != E; ++I)
Ops.push_back(*I);
IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
if (IsModified) {
Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
}
} while (IsModified);
}
if (Result && Result->isMachineOpcode() &&
!(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
&& TII->isALUInstr(Result->getMachineOpcode())) {
@ -360,6 +377,43 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
return Result;
}
bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg,
SDValue &Abs, const R600InstrInfo *TII,
std::vector<unsigned> Consts) {
switch (Src.getOpcode()) {
case AMDGPUISD::CONST_ADDRESS: {
SDValue CstOffset;
if (Src.getValueType().isVector() ||
!SelectGlobalValueConstantOffset(Src.getOperand(0), CstOffset))
return false;
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
Consts.push_back(Cst->getZExtValue());
if (!TII->fitsConstReadLimitations(Consts))
return false;
Src = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
Sel = CstOffset;
return true;
}
case ISD::FNEG:
Src = Src.getOperand(0);
Neg = CurDAG->getTargetConstant(1, MVT::i32);
return true;
case ISD::FABS:
if (!Abs.getNode())
return false;
Src = Src.getOperand(0);
Abs = CurDAG->getTargetConstant(1, MVT::i32);
return true;
case ISD::BITCAST:
Src = Src.getOperand(0);
return true;
default:
return false;
}
}
bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
int OperandIdx[] = {
@ -383,59 +437,101 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
-1
};
// Gather constants values
std::vector<unsigned> Consts;
for (unsigned j = 0; j < 3; j++) {
int SrcIdx = OperandIdx[j];
if (SrcIdx < 0)
break;
if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
if (Reg->getReg() == AMDGPU::ALU_CONST) {
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
Consts.push_back(Cst->getZExtValue());
}
}
}
for (unsigned i = 0; i < 3; i++) {
if (OperandIdx[i] < 0)
return false;
SDValue Operand = Ops[OperandIdx[i] - 1];
switch (Operand.getOpcode()) {
case AMDGPUISD::CONST_ADDRESS: {
SDValue CstOffset;
if (Operand.getValueType().isVector() ||
!SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
break;
SDValue &Src = Ops[OperandIdx[i] - 1];
SDValue &Sel = Ops[SelIdx[i] - 1];
SDValue &Neg = Ops[NegIdx[i] - 1];
SDValue FakeAbs;
SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts))
return true;
}
return false;
}
// Gather others constants values
std::vector<unsigned> Consts;
for (unsigned j = 0; j < 3; j++) {
int SrcIdx = OperandIdx[j];
if (SrcIdx < 0)
break;
if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
if (Reg->getReg() == AMDGPU::ALU_CONST) {
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
Consts.push_back(Cst->getZExtValue());
}
}
}
bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
int OperandIdx[] = {
TII->getOperandIdx(Opcode, R600Operands::SRC0_X),
TII->getOperandIdx(Opcode, R600Operands::SRC0_Y),
TII->getOperandIdx(Opcode, R600Operands::SRC0_Z),
TII->getOperandIdx(Opcode, R600Operands::SRC0_W),
TII->getOperandIdx(Opcode, R600Operands::SRC1_X),
TII->getOperandIdx(Opcode, R600Operands::SRC1_Y),
TII->getOperandIdx(Opcode, R600Operands::SRC1_Z),
TII->getOperandIdx(Opcode, R600Operands::SRC1_W)
};
int SelIdx[] = {
TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_X),
TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_Y),
TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_Z),
TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_W),
TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_X),
TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_Y),
TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_Z),
TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_W)
};
int NegIdx[] = {
TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_X),
TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_Y),
TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_Z),
TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_W),
TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_X),
TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_Y),
TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_Z),
TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_W)
};
int AbsIdx[] = {
TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_X),
TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_Y),
TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_Z),
TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_W),
TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_X),
TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_Y),
TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_Z),
TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_W)
};
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
Consts.push_back(Cst->getZExtValue());
if (!TII->fitsConstReadLimitations(Consts))
break;
Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
Ops[SelIdx[i] - 1] = CstOffset;
return true;
}
case ISD::FNEG:
if (NegIdx[i] < 0)
break;
Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
Ops[NegIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
return true;
case ISD::FABS:
if (AbsIdx[i] < 0)
break;
Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
Ops[AbsIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
return true;
case ISD::BITCAST:
Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
return true;
default:
// Gather constants values
std::vector<unsigned> Consts;
for (unsigned j = 0; j < 8; j++) {
int SrcIdx = OperandIdx[j];
if (SrcIdx < 0)
break;
if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
if (Reg->getReg() == AMDGPU::ALU_CONST) {
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
Consts.push_back(Cst->getZExtValue());
}
}
}
for (unsigned i = 0; i < 8; i++) {
if (OperandIdx[i] < 0)
return false;
SDValue &Src = Ops[OperandIdx[i] - 1];
SDValue &Sel = Ops[SelIdx[i] - 1];
SDValue &Neg = Ops[NegIdx[i] - 1];
SDValue &Abs = Ops[AbsIdx[i] - 1];
if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts))
return true;
}
return false;
}

View File

@ -108,7 +108,8 @@ private:
std::vector<std::pair<unsigned, unsigned> > UsedKCache;
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> &Consts =
TII->getSrcs(MI);
assert(TII->isALUInstr(MI->getOpcode()) && "Can't assign Const");
assert((TII->isALUInstr(MI->getOpcode()) ||
MI->getOpcode() == AMDGPU::DOT_4) && "Can't assign Const");
for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
continue;
@ -183,6 +184,9 @@ private:
if (TII->isALUInstr(I->getOpcode()) &&
!SubstituteKCacheBank(I, KCacheBanks))
break;
if (I->getOpcode() == AMDGPU::DOT_4 &&
!SubstituteKCacheBank(I, KCacheBanks))
break;
AluInstCount += OccupiedDwords(I);
}
unsigned Opcode = PushBeforeModifier ?

View File

@ -214,7 +214,9 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
.getReg();
(void) Src0;
(void) Src1;
assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1));
if ((TRI.getEncodingValue(Src0) & 0xff) < 127 &&
(TRI.getEncodingValue(Src1) & 0xff) < 127)
assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1));
}
MI.eraseFromParent();
continue;

View File

@ -169,6 +169,31 @@ SmallVector<std::pair<MachineOperand *, int64_t>, 3>
R600InstrInfo::getSrcs(MachineInstr *MI) const {
SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
if (MI->getOpcode() == AMDGPU::DOT_4) {
static const R600Operands::VecOps OpTable[8][2] = {
{R600Operands::SRC0_X, R600Operands::SRC0_SEL_X},
{R600Operands::SRC0_Y, R600Operands::SRC0_SEL_Y},
{R600Operands::SRC0_Z, R600Operands::SRC0_SEL_Z},
{R600Operands::SRC0_W, R600Operands::SRC0_SEL_W},
{R600Operands::SRC1_X, R600Operands::SRC1_SEL_X},
{R600Operands::SRC1_Y, R600Operands::SRC1_SEL_Y},
{R600Operands::SRC1_Z, R600Operands::SRC1_SEL_Z},
{R600Operands::SRC1_W, R600Operands::SRC1_SEL_W},
};
for (unsigned j = 0; j < 8; j++) {
MachineOperand &MO = MI->getOperand(OpTable[j][0] + 1);
unsigned Reg = MO.getReg();
if (Reg == AMDGPU::ALU_CONST) {
unsigned Sel = MI->getOperand(OpTable[j][1] + 1).getImm();
Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
continue;
}
}
return Result;
}
static const R600Operands::Ops OpTable[3][2] = {
{R600Operands::SRC0, R600Operands::SRC0_SEL},
{R600Operands::SRC1, R600Operands::SRC1_SEL},
@ -967,6 +992,11 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
return getOperandIdx(MI.getOpcode(), Op);
}
int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
R600Operands::VecOps Op) const {
return getOperandIdx(MI.getOpcode(), Op);
}
int R600InstrInfo::getOperandIdx(unsigned Opcode,
R600Operands::Ops Op) const {
unsigned TargetFlags = get(Opcode).TSFlags;
@ -997,6 +1027,11 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode,
return R600Operands::ALUOpTable[OpTableIdx][Op];
}
int R600InstrInfo::getOperandIdx(unsigned Opcode,
R600Operands::VecOps Op) const {
return Op + 1;
}
void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
int64_t Imm) const {
int Idx = getOperandIdx(*MI, Op);

View File

@ -212,11 +212,13 @@ namespace llvm {
///
/// \returns -1 if the Instruction does not contain the specified \p Op.
int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const;
int getOperandIdx(const MachineInstr &MI, R600Operands::VecOps Op) const;
/// \brief Get the index of \p Op for the given Opcode.
///
/// \returns -1 if the Instruction does not contain the specified \p Op.
int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const;
int getOperandIdx(unsigned Opcode, R600Operands::VecOps Op) const;
/// \brief Helper function for setting instruction flag values.
void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const;

View File

@ -0,0 +1,27 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
; Exactly one constant vector can be folded into dot4, which means exactly
; 4 MOV instructions
; CHECK: @main
; CHECK: MOV
; CHECK: MOV
; CHECK: MOV
; CHECK: MOV
; CHECK-NOT: MOV
; CHECK-NOT: MOV
; CHECK-NOT: MOV
; CHECK-NOT: MOV
define void @main(float addrspace(1)* %out) {
main_body:
%0 = load <4 x float> addrspace(8)* null
%1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%2 = call float @llvm.AMDGPU.dp4(<4 x float> %0,<4 x float> %1)
%3 = insertelement <4 x float> undef, float %2, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %3, i32 0, i32 0)
ret void
}
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #1 = { readnone }