forked from OSchip/llvm-project
R600: Const/Neg/Abs can be folded to dot4
llvm-svn: 183278
This commit is contained in:
parent
31caf980c6
commit
c689679173
|
@ -49,7 +49,10 @@ public:
|
|||
|
||||
private:
|
||||
inline SDValue getSmallIPtrImm(unsigned Imm);
|
||||
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
|
||||
const R600InstrInfo *TII, std::vector<unsigned> Cst);
|
||||
bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
|
||||
bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
|
||||
|
||||
// Complex pattern selectors
|
||||
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
|
||||
|
@ -318,6 +321,20 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
|||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||
const R600InstrInfo *TII =
|
||||
static_cast<const R600InstrInfo*>(TM.getInstrInfo());
|
||||
if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
|
||||
bool IsModified = false;
|
||||
do {
|
||||
std::vector<SDValue> Ops;
|
||||
for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
|
||||
I != E; ++I)
|
||||
Ops.push_back(*I);
|
||||
IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
|
||||
if (IsModified) {
|
||||
Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
|
||||
}
|
||||
} while (IsModified);
|
||||
|
||||
}
|
||||
if (Result && Result->isMachineOpcode() &&
|
||||
!(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
|
||||
&& TII->isALUInstr(Result->getMachineOpcode())) {
|
||||
|
@ -360,6 +377,43 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
|||
return Result;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg,
|
||||
SDValue &Abs, const R600InstrInfo *TII,
|
||||
std::vector<unsigned> Consts) {
|
||||
switch (Src.getOpcode()) {
|
||||
case AMDGPUISD::CONST_ADDRESS: {
|
||||
SDValue CstOffset;
|
||||
if (Src.getValueType().isVector() ||
|
||||
!SelectGlobalValueConstantOffset(Src.getOperand(0), CstOffset))
|
||||
return false;
|
||||
|
||||
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
|
||||
Consts.push_back(Cst->getZExtValue());
|
||||
if (!TII->fitsConstReadLimitations(Consts))
|
||||
return false;
|
||||
|
||||
Src = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
|
||||
Sel = CstOffset;
|
||||
return true;
|
||||
}
|
||||
case ISD::FNEG:
|
||||
Src = Src.getOperand(0);
|
||||
Neg = CurDAG->getTargetConstant(1, MVT::i32);
|
||||
return true;
|
||||
case ISD::FABS:
|
||||
if (!Abs.getNode())
|
||||
return false;
|
||||
Src = Src.getOperand(0);
|
||||
Abs = CurDAG->getTargetConstant(1, MVT::i32);
|
||||
return true;
|
||||
case ISD::BITCAST:
|
||||
Src = Src.getOperand(0);
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
|
||||
const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
|
||||
int OperandIdx[] = {
|
||||
|
@ -383,59 +437,101 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
|
|||
-1
|
||||
};
|
||||
|
||||
// Gather constants values
|
||||
std::vector<unsigned> Consts;
|
||||
for (unsigned j = 0; j < 3; j++) {
|
||||
int SrcIdx = OperandIdx[j];
|
||||
if (SrcIdx < 0)
|
||||
break;
|
||||
if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
|
||||
if (Reg->getReg() == AMDGPU::ALU_CONST) {
|
||||
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
|
||||
Consts.push_back(Cst->getZExtValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
if (OperandIdx[i] < 0)
|
||||
return false;
|
||||
SDValue Operand = Ops[OperandIdx[i] - 1];
|
||||
switch (Operand.getOpcode()) {
|
||||
case AMDGPUISD::CONST_ADDRESS: {
|
||||
SDValue CstOffset;
|
||||
if (Operand.getValueType().isVector() ||
|
||||
!SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
|
||||
break;
|
||||
SDValue &Src = Ops[OperandIdx[i] - 1];
|
||||
SDValue &Sel = Ops[SelIdx[i] - 1];
|
||||
SDValue &Neg = Ops[NegIdx[i] - 1];
|
||||
SDValue FakeAbs;
|
||||
SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
|
||||
if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Gather others constants values
|
||||
std::vector<unsigned> Consts;
|
||||
for (unsigned j = 0; j < 3; j++) {
|
||||
int SrcIdx = OperandIdx[j];
|
||||
if (SrcIdx < 0)
|
||||
break;
|
||||
if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
|
||||
if (Reg->getReg() == AMDGPU::ALU_CONST) {
|
||||
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
|
||||
Consts.push_back(Cst->getZExtValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
|
||||
const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
|
||||
int OperandIdx[] = {
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_X),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_Y),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_Z),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_W),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_X),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_Y),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_Z),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_W)
|
||||
};
|
||||
int SelIdx[] = {
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_X),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_Y),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_Z),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL_W),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_X),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_Y),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_Z),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL_W)
|
||||
};
|
||||
int NegIdx[] = {
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_X),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_Y),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_Z),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_NEG_W),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_X),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_Y),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_Z),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_NEG_W)
|
||||
};
|
||||
int AbsIdx[] = {
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_X),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_Y),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_Z),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC0_ABS_W),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_X),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_Y),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_Z),
|
||||
TII->getOperandIdx(Opcode, R600Operands::SRC1_ABS_W)
|
||||
};
|
||||
|
||||
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
|
||||
Consts.push_back(Cst->getZExtValue());
|
||||
if (!TII->fitsConstReadLimitations(Consts))
|
||||
break;
|
||||
|
||||
Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
|
||||
Ops[SelIdx[i] - 1] = CstOffset;
|
||||
return true;
|
||||
}
|
||||
case ISD::FNEG:
|
||||
if (NegIdx[i] < 0)
|
||||
break;
|
||||
Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
|
||||
Ops[NegIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
|
||||
return true;
|
||||
case ISD::FABS:
|
||||
if (AbsIdx[i] < 0)
|
||||
break;
|
||||
Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
|
||||
Ops[AbsIdx[i] - 1] = CurDAG->getTargetConstant(1, MVT::i32);
|
||||
return true;
|
||||
case ISD::BITCAST:
|
||||
Ops[OperandIdx[i] - 1] = Operand.getOperand(0);
|
||||
return true;
|
||||
default:
|
||||
// Gather constants values
|
||||
std::vector<unsigned> Consts;
|
||||
for (unsigned j = 0; j < 8; j++) {
|
||||
int SrcIdx = OperandIdx[j];
|
||||
if (SrcIdx < 0)
|
||||
break;
|
||||
if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
|
||||
if (Reg->getReg() == AMDGPU::ALU_CONST) {
|
||||
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
|
||||
Consts.push_back(Cst->getZExtValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < 8; i++) {
|
||||
if (OperandIdx[i] < 0)
|
||||
return false;
|
||||
SDValue &Src = Ops[OperandIdx[i] - 1];
|
||||
SDValue &Sel = Ops[SelIdx[i] - 1];
|
||||
SDValue &Neg = Ops[NegIdx[i] - 1];
|
||||
SDValue &Abs = Ops[AbsIdx[i] - 1];
|
||||
if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -108,7 +108,8 @@ private:
|
|||
std::vector<std::pair<unsigned, unsigned> > UsedKCache;
|
||||
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> &Consts =
|
||||
TII->getSrcs(MI);
|
||||
assert(TII->isALUInstr(MI->getOpcode()) && "Can't assign Const");
|
||||
assert((TII->isALUInstr(MI->getOpcode()) ||
|
||||
MI->getOpcode() == AMDGPU::DOT_4) && "Can't assign Const");
|
||||
for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
|
||||
if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
|
||||
continue;
|
||||
|
@ -183,6 +184,9 @@ private:
|
|||
if (TII->isALUInstr(I->getOpcode()) &&
|
||||
!SubstituteKCacheBank(I, KCacheBanks))
|
||||
break;
|
||||
if (I->getOpcode() == AMDGPU::DOT_4 &&
|
||||
!SubstituteKCacheBank(I, KCacheBanks))
|
||||
break;
|
||||
AluInstCount += OccupiedDwords(I);
|
||||
}
|
||||
unsigned Opcode = PushBeforeModifier ?
|
||||
|
|
|
@ -214,7 +214,9 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
|||
.getReg();
|
||||
(void) Src0;
|
||||
(void) Src1;
|
||||
assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1));
|
||||
if ((TRI.getEncodingValue(Src0) & 0xff) < 127 &&
|
||||
(TRI.getEncodingValue(Src1) & 0xff) < 127)
|
||||
assert(TRI.getHWRegChan(Src0) == TRI.getHWRegChan(Src1));
|
||||
}
|
||||
MI.eraseFromParent();
|
||||
continue;
|
||||
|
|
|
@ -169,6 +169,31 @@ SmallVector<std::pair<MachineOperand *, int64_t>, 3>
|
|||
R600InstrInfo::getSrcs(MachineInstr *MI) const {
|
||||
SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
|
||||
|
||||
if (MI->getOpcode() == AMDGPU::DOT_4) {
|
||||
static const R600Operands::VecOps OpTable[8][2] = {
|
||||
{R600Operands::SRC0_X, R600Operands::SRC0_SEL_X},
|
||||
{R600Operands::SRC0_Y, R600Operands::SRC0_SEL_Y},
|
||||
{R600Operands::SRC0_Z, R600Operands::SRC0_SEL_Z},
|
||||
{R600Operands::SRC0_W, R600Operands::SRC0_SEL_W},
|
||||
{R600Operands::SRC1_X, R600Operands::SRC1_SEL_X},
|
||||
{R600Operands::SRC1_Y, R600Operands::SRC1_SEL_Y},
|
||||
{R600Operands::SRC1_Z, R600Operands::SRC1_SEL_Z},
|
||||
{R600Operands::SRC1_W, R600Operands::SRC1_SEL_W},
|
||||
};
|
||||
|
||||
for (unsigned j = 0; j < 8; j++) {
|
||||
MachineOperand &MO = MI->getOperand(OpTable[j][0] + 1);
|
||||
unsigned Reg = MO.getReg();
|
||||
if (Reg == AMDGPU::ALU_CONST) {
|
||||
unsigned Sel = MI->getOperand(OpTable[j][1] + 1).getImm();
|
||||
Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
|
||||
continue;
|
||||
}
|
||||
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
|
||||
static const R600Operands::Ops OpTable[3][2] = {
|
||||
{R600Operands::SRC0, R600Operands::SRC0_SEL},
|
||||
{R600Operands::SRC1, R600Operands::SRC1_SEL},
|
||||
|
@ -967,6 +992,11 @@ int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
|
|||
return getOperandIdx(MI.getOpcode(), Op);
|
||||
}
|
||||
|
||||
int R600InstrInfo::getOperandIdx(const MachineInstr &MI,
|
||||
R600Operands::VecOps Op) const {
|
||||
return getOperandIdx(MI.getOpcode(), Op);
|
||||
}
|
||||
|
||||
int R600InstrInfo::getOperandIdx(unsigned Opcode,
|
||||
R600Operands::Ops Op) const {
|
||||
unsigned TargetFlags = get(Opcode).TSFlags;
|
||||
|
@ -997,6 +1027,11 @@ int R600InstrInfo::getOperandIdx(unsigned Opcode,
|
|||
return R600Operands::ALUOpTable[OpTableIdx][Op];
|
||||
}
|
||||
|
||||
int R600InstrInfo::getOperandIdx(unsigned Opcode,
|
||||
R600Operands::VecOps Op) const {
|
||||
return Op + 1;
|
||||
}
|
||||
|
||||
void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
|
||||
int64_t Imm) const {
|
||||
int Idx = getOperandIdx(*MI, Op);
|
||||
|
|
|
@ -212,11 +212,13 @@ namespace llvm {
|
|||
///
|
||||
/// \returns -1 if the Instruction does not contain the specified \p Op.
|
||||
int getOperandIdx(const MachineInstr &MI, R600Operands::Ops Op) const;
|
||||
int getOperandIdx(const MachineInstr &MI, R600Operands::VecOps Op) const;
|
||||
|
||||
/// \brief Get the index of \p Op for the given Opcode.
|
||||
///
|
||||
/// \returns -1 if the Instruction does not contain the specified \p Op.
|
||||
int getOperandIdx(unsigned Opcode, R600Operands::Ops Op) const;
|
||||
int getOperandIdx(unsigned Opcode, R600Operands::VecOps Op) const;
|
||||
|
||||
/// \brief Helper function for setting instruction flag values.
|
||||
void setImmOperand(MachineInstr *MI, R600Operands::Ops Op, int64_t Imm) const;
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
|
||||
; Exactly one constant vector can be folded into dot4, which means exactly
|
||||
; 4 MOV instructions
|
||||
; CHECK: @main
|
||||
; CHECK: MOV
|
||||
; CHECK: MOV
|
||||
; CHECK: MOV
|
||||
; CHECK: MOV
|
||||
; CHECK-NOT: MOV
|
||||
; CHECK-NOT: MOV
|
||||
; CHECK-NOT: MOV
|
||||
; CHECK-NOT: MOV
|
||||
|
||||
define void @main(float addrspace(1)* %out) {
|
||||
main_body:
|
||||
%0 = load <4 x float> addrspace(8)* null
|
||||
%1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
|
||||
%2 = call float @llvm.AMDGPU.dp4(<4 x float> %0,<4 x float> %1)
|
||||
%3 = insertelement <4 x float> undef, float %2, i32 0
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %3, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
attributes #1 = { readnone }
|
Loading…
Reference in New Issue