forked from OSchip/llvm-project
R600: Add support for GROUP_BARRIER instruction
Reviewed-by: Vincent Lejeune<vljn at ovi.com> llvm-svn: 185161
This commit is contained in:
parent
5eb903d9c5
commit
ce540330df
|
@ -50,6 +50,8 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
|
|||
def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_AMDGPU_barrier_local : Intrinsic<[], [], []>;
|
||||
}
|
||||
|
||||
let TargetPrefix = "TGSI", isTarget = 1 in {
|
||||
|
|
|
@ -177,7 +177,14 @@ private:
|
|||
AluInstCount ++;
|
||||
continue;
|
||||
}
|
||||
if (I->getOpcode() == AMDGPU::KILLGT) {
|
||||
// XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
|
||||
//
|
||||
// * KILL or INTERP instructions
|
||||
// * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
|
||||
// * Uses waterfalling (i.e. INDEX_MODE = AR.X)
|
||||
//
|
||||
// XXX: These checks have not been implemented yet.
|
||||
if (TII->mustBeLastInClause(I->getOpcode())) {
|
||||
I++;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -163,6 +163,16 @@ bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
|
|||
usesTextureCache(MI->getOpcode());
|
||||
}
|
||||
|
||||
bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
|
||||
switch (Opcode) {
|
||||
case AMDGPU::KILLGT:
|
||||
case AMDGPU::GROUP_BARRIER:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
SmallVector<std::pair<MachineOperand *, int64_t>, 3>
|
||||
R600InstrInfo::getSrcs(MachineInstr *MI) const {
|
||||
SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
|
||||
|
|
|
@ -72,6 +72,8 @@ namespace llvm {
|
|||
bool usesTextureCache(unsigned Opcode) const;
|
||||
bool usesTextureCache(const MachineInstr *MI) const;
|
||||
|
||||
bool mustBeLastInClause(unsigned Opcode) const;
|
||||
|
||||
/// \returns a pair for each src of an ALU instructions.
|
||||
/// The first member of a pair is the register id.
|
||||
/// If register is ALU_CONST, second member is SEL.
|
||||
|
|
|
@ -1499,6 +1499,36 @@ let hasSideEffects = 1 in {
|
|||
|
||||
def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
|
||||
|
||||
def GROUP_BARRIER : InstR600 <
|
||||
(outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local)], AnyALU>,
|
||||
R600ALU_Word0,
|
||||
R600ALU_Word1_OP2 <0x54> {
|
||||
|
||||
let dst = 0;
|
||||
let dst_rel = 0;
|
||||
let src0 = 0;
|
||||
let src0_rel = 0;
|
||||
let src0_neg = 0;
|
||||
let src0_abs = 0;
|
||||
let src1 = 0;
|
||||
let src1_rel = 0;
|
||||
let src1_neg = 0;
|
||||
let src1_abs = 0;
|
||||
let write = 0;
|
||||
let omod = 0;
|
||||
let clamp = 0;
|
||||
let last = 1;
|
||||
let bank_swizzle = 0;
|
||||
let pred_sel = 0;
|
||||
let update_exec_mask = 0;
|
||||
let update_pred = 0;
|
||||
|
||||
let Inst{31-0} = Word0;
|
||||
let Inst{63-32} = Word1;
|
||||
|
||||
let ALUInst = 1;
|
||||
}
|
||||
|
||||
// TRUNC is used for the FLT_TO_INT instructions to work around a
|
||||
// perceived problem where the rounding modes are applied differently
|
||||
// depending on the instruction and the slot they are in.
|
||||
|
|
|
@ -269,10 +269,14 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
|
|||
}
|
||||
|
||||
// Does the instruction take a whole IG ?
|
||||
// XXX: Is it possible to add a helper function in R600InstrInfo that can
|
||||
// be used here and in R600PacketizerList::isSoloInstruction() ?
|
||||
if(TII->isVector(*MI) ||
|
||||
TII->isCubeOp(MI->getOpcode()) ||
|
||||
TII->isReductionOp(MI->getOpcode()))
|
||||
TII->isReductionOp(MI->getOpcode()) ||
|
||||
MI->getOpcode() == AMDGPU::GROUP_BARRIER) {
|
||||
return AluT_XYZW;
|
||||
}
|
||||
|
||||
// Is the result already assigned to a channel ?
|
||||
unsigned DestSubReg = MI->getOperand(0).getSubReg();
|
||||
|
|
|
@ -82,7 +82,11 @@ private:
|
|||
int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
|
||||
if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
|
||||
continue;
|
||||
unsigned Dst = BI->getOperand(0).getReg();
|
||||
int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
|
||||
if (DstIdx == -1) {
|
||||
continue;
|
||||
}
|
||||
unsigned Dst = BI->getOperand(DstIdx).getReg();
|
||||
if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
|
||||
BI->getOpcode() == AMDGPU::DOT4_eg) {
|
||||
Result[Dst] = AMDGPU::PV_X;
|
||||
|
@ -154,6 +158,8 @@ public:
|
|||
return true;
|
||||
if (TII->isTransOnly(MI))
|
||||
return true;
|
||||
if (MI->getOpcode() == AMDGPU::GROUP_BARRIER)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
|
||||
; CHECK: GROUP_BARRIER
|
||||
|
||||
define void @test(i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%0 = call i32 @llvm.r600.read.tidig.x()
|
||||
%1 = getelementptr i32 addrspace(1)* %out, i32 %0
|
||||
store i32 %0, i32 addrspace(1)* %1
|
||||
call void @llvm.AMDGPU.barrier.local()
|
||||
%2 = call i32 @llvm.r600.read.local.size.x()
|
||||
%3 = sub i32 %2, 1
|
||||
%4 = sub i32 %3, %0
|
||||
%5 = getelementptr i32 addrspace(1)* %out, i32 %4
|
||||
%6 = load i32 addrspace(1)* %5
|
||||
store i32 %6, i32 addrspace(1)* %1
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #0
|
||||
declare void @llvm.AMDGPU.barrier.local()
|
||||
declare i32 @llvm.r600.read.local.size.x() #0
|
||||
|
||||
attributes #0 = { readnone }
|
Loading…
Reference in New Issue