forked from OSchip/llvm-project
[AMDGPU] Fix mfma scheduling crash
An SUnit can be neither intruction not SDNode. It is all null if represents a nop. Fixed a crash on using SU->getInstr(). Differential Revision: https://reviews.llvm.org/D69395
This commit is contained in:
parent
5da6d4ec16
commit
3c8e055187
|
@ -773,6 +773,11 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
|
|||
return MI && TII->isSALU(*MI) && !MI->isTerminator();
|
||||
}
|
||||
|
||||
bool isVALU(const SUnit *SU) const {
|
||||
const MachineInstr *MI = SU->getInstr();
|
||||
return MI && TII->isVALU(*MI);
|
||||
}
|
||||
|
||||
bool canAddEdge(const SUnit *Succ, const SUnit *Pred) const {
|
||||
if (Pred->NodeNum < Succ->NodeNum)
|
||||
return true;
|
||||
|
@ -821,7 +826,7 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
|
|||
|
||||
for (SDep &SI : From->Succs) {
|
||||
SUnit *SUv = SI.getSUnit();
|
||||
if (SUv != From && TII->isVALU(*SUv->getInstr()) && canAddEdge(SUv, SU))
|
||||
if (SUv != From && isVALU(SUv) && canAddEdge(SUv, SU))
|
||||
SUv->addPred(SDep(SU, SDep::Artificial), false);
|
||||
}
|
||||
|
||||
|
|
|
@ -76,5 +76,39 @@ exit:
|
|||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_mfma_loop_mfma_forward_init:
|
||||
|
||||
; GCN-COUNT32: v_accvgpr_write_b32 a{{[0-9]+}}, 0
|
||||
; GCN: v_mfma_f32_32x32x1f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
|
||||
; GCN: [[LOOP:BB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GCN: v_mfma_f32_32x32x1f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GCN: s_cbranch_scc1 [[LOOP]]
|
||||
|
||||
; GCN-COUNT32: v_accvgpr_read_b32
|
||||
define amdgpu_kernel void @test_mfma_loop_mfma_forward_init(<32 x float> addrspace(1)* %arg) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%init = bitcast i32 %tid to float
|
||||
%mai.0 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> zeroinitializer, i32 0, i32 0, i32 0)
|
||||
|
||||
br label %for.cond.preheader
|
||||
|
||||
for.cond.preheader:
|
||||
%phi = phi <32 x float> [ %mai.0, %entry ], [ %mai.1, %for.cond.preheader ]
|
||||
%c = phi i32 [ 0, %entry ], [ %inc, %for.cond.preheader ]
|
||||
%mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %phi, i32 0, i32 0, i32 0)
|
||||
%inc = add nuw nsw i32 %c, 1
|
||||
%cc = icmp eq i32 %inc, 16
|
||||
br i1 %cc, label %exit, label %for.cond.preheader
|
||||
|
||||
exit:
|
||||
store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
|
||||
declare i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
|
Loading…
Reference in New Issue