AMDGPU: Fix assert when rewriting saddr d16 loads

moveOperands does not handle moving tied operands since it would
generally have to fixup the tied operand references. Avoid the assert
by untying and retying after the modification. These in place
modifications really aren't managable.
This commit is contained in:
Matt Arsenault 2021-05-11 18:10:47 -04:00
parent 990e806b36
commit c7cff08f79
2 changed files with 41 additions and 1 deletions

View File

@ -5034,8 +5034,24 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const {
} else {
assert(OldSAddrIdx == NewVAddrIdx);
if (OldVAddrIdx >= 0)
if (OldVAddrIdx >= 0) {
int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
AMDGPU::OpName::vdst_in);
// RemoveOperand doesn't try to fixup tied operand indexes at it goes, so
// it asserts. Untie the operands for now and retie them afterwards.
if (NewVDstIn != -1) {
int OldVDstIn = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
Inst.untieRegOperand(OldVDstIn);
}
Inst.RemoveOperand(OldVAddrIdx);
if (NewVDstIn != -1) {
int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
Inst.tieOperands(NewVDst, NewVDstIn);
}
}
}
if (VAddrDef && MRI.use_nodbg_empty(VAddrDef->getOperand(0).getReg()))

View File

@ -31,3 +31,27 @@ bb3: ; preds = %bb3, %bb
%i9 = icmp eq i32 %i8, 256
br i1 %i9, label %bb2, label %bb3
}
; GCN-LABEL: {{^}}test_move_load_address_to_vgpr_d16_hi:
; GCN-NOT: v_readfirstlane_b32
; GCN: global_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}], off glc
define amdgpu_kernel void @test_move_load_address_to_vgpr_d16_hi(i16 addrspace(1)* nocapture %arg) {
bb:
%i1 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 0
%load.pre = load volatile i16, i16 addrspace(1)* %i1, align 4
%i2 = zext i16 %load.pre to i32
br label %bb3
bb2: ; preds = %bb3
ret void
bb3: ; preds = %bb3, %bb
%i = phi i32 [ %i2, %bb ], [ %i8, %bb3 ]
%i4 = zext i32 %i to i64
%i5 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %i4
%i6 = load volatile i16, i16 addrspace(1)* %i5, align 4
%insertelt = insertelement <2 x i16> undef, i16 %i6, i32 1
%i8 = bitcast <2 x i16> %insertelt to i32
%i9 = icmp eq i32 %i8, 256
br i1 %i9, label %bb2, label %bb3
}