R600: Move code handling literal folding into R600ISelLowering.

llvm-svn: 190644
This commit is contained in:
Vincent Lejeune 2013-09-12 23:44:53 +00:00
parent ab3baf80a8
commit 9a248e5c2d
3 changed files with 75 additions and 109 deletions

View File

@ -193,8 +193,6 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
}
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
const R600InstrInfo *TII =
static_cast<const R600InstrInfo*>(TM.getInstrInfo());
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
return NULL; // Already selected.
@ -310,109 +308,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
SDLoc(N), N->getValueType(0), Ops);
}
case ISD::ConstantFP:
case ISD::Constant: {
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
// XXX: Custom immediate lowering not implemented yet. Instead we use
// pseudo instructions defined in SIInstructions.td
if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
break;
}
uint64_t ImmValue = 0;
unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
if (N->getOpcode() == ISD::ConstantFP) {
// XXX: 64-bit Immediates not supported yet
assert(N->getValueType(0) != MVT::f64);
ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
APFloat Value = C->getValueAPF();
float FloatValue = Value.convertToFloat();
if (FloatValue == 0.0) {
ImmReg = AMDGPU::ZERO;
} else if (FloatValue == 0.5) {
ImmReg = AMDGPU::HALF;
} else if (FloatValue == 1.0) {
ImmReg = AMDGPU::ONE;
} else {
ImmValue = Value.bitcastToAPInt().getZExtValue();
}
} else {
// XXX: 64-bit Immediates not supported yet
assert(N->getValueType(0) != MVT::i64);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
if (C->getZExtValue() == 0) {
ImmReg = AMDGPU::ZERO;
} else if (C->getZExtValue() == 1) {
ImmReg = AMDGPU::ONE_INT;
} else {
ImmValue = C->getZExtValue();
}
}
for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
Use != SDNode::use_end(); Use = Next) {
Next = llvm::next(Use);
std::vector<SDValue> Ops;
for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
Ops.push_back(Use->getOperand(i));
}
if (!Use->isMachineOpcode()) {
if (ImmReg == AMDGPU::ALU_LITERAL_X) {
// We can only use literal constants (e.g. AMDGPU::ZERO,
// AMDGPU::ONE, etc) in machine opcodes.
continue;
}
} else {
switch(Use->getMachineOpcode()) {
case AMDGPU::REG_SEQUENCE: break;
default:
if (!TII->isALUInstr(Use->getMachineOpcode()) ||
(TII->get(Use->getMachineOpcode()).TSFlags &
R600_InstFlag::VECTOR)) {
continue;
}
}
// Check that we aren't already using an immediate.
// XXX: It's possible for an instruction to have more than one
// immediate operand, but this is not supported yet.
if (ImmReg == AMDGPU::ALU_LITERAL_X) {
int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
AMDGPU::OpName::literal);
if (ImmIdx == -1) {
continue;
}
if (TII->getOperandIdx(Use->getMachineOpcode(),
AMDGPU::OpName::dst) != -1) {
// subtract one from ImmIdx, because the DST operand is usually index
// 0 for MachineInstrs, but we have no DST in the Ops vector.
ImmIdx--;
}
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
assert(C);
if (C->getZExtValue() != 0) {
// This instruction is already using an immediate.
continue;
}
// Set the immediate value
Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
}
}
// Set the immediate register
Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);
CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
}
break;
}
}
SDNode *Result = SelectCode(N);

View File

@ -1632,7 +1632,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
static bool
FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
SDValue &Abs, SDValue &Sel, SelectionDAG &DAG) {
SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
const R600InstrInfo *TII =
static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
if (!Src.isMachineOpcode())
@ -1705,6 +1705,51 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
return true;
}
case AMDGPU::MOV_IMM_I32:
case AMDGPU::MOV_IMM_F32: {
unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
uint64_t ImmValue = 0;
if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
float FloatValue = FPC->getValueAPF().convertToFloat();
if (FloatValue == 0.0) {
ImmReg = AMDGPU::ZERO;
} else if (FloatValue == 0.5) {
ImmReg = AMDGPU::HALF;
} else if (FloatValue == 1.0) {
ImmReg = AMDGPU::ONE;
} else {
ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
}
} else {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
uint64_t Value = C->getZExtValue();
if (Value == 0) {
ImmReg = AMDGPU::ZERO;
} else if (Value == 1) {
ImmReg = AMDGPU::ONE_INT;
} else {
ImmValue = Value;
}
}
// Check that we aren't already using an immediate.
// XXX: It's possible for an instruction to have more than one
// immediate operand, but this is not supported yet.
if (ImmReg == AMDGPU::ALU_LITERAL_X) {
if (!Imm.getNode())
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
assert(C);
if (C->getZExtValue())
return false;
Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
}
Src = DAG.getRegister(ImmReg, MVT::i32);
return true;
}
default:
return false;
}
@ -1768,7 +1813,13 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
if (HasDst)
SelIdx--;
SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
}
} else if (Opcode == AMDGPU::REG_SEQUENCE) {
for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
SDValue &Src = Ops[i];
if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
}
} else {
@ -1798,10 +1849,14 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
if (HasDst)
int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
if (HasDst) {
SelIdx--;
ImmIdx--;
}
SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
SDValue &Imm = Ops[ImmIdx];
if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
}
}

View File

@ -46,3 +46,19 @@ entry:
store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> addrspace(1)* %out
ret void
}
; CHECK: @inline_literal_dot4
; CHECK: DOT4 T[[GPR:[0-9]]].X, 1.0
; CHECK-NEXT: DOT4 T[[GPR]].Y (MASKED), 1.0
; CHECK-NEXT: DOT4 T[[GPR]].Z (MASKED), 1.0
; CHECK-NEXT: DOT4 * T[[GPR]].W (MASKED), 1.0
define void @inline_literal_dot4(float addrspace(1)* %out) {
entry:
%0 = call float @llvm.AMDGPU.dp4(<4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
store float %0, float addrspace(1)* %out
ret void
}
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
attributes #1 = { readnone }