forked from OSchip/llvm-project
AMDGPU: Fix assert when rewriting saddr d16 loads
moveOperands does not handle moving tied operands since it would generally have to fixup the tied operand references. Avoid the assert by untying and retying after the modification. These in place modifications really aren't managable.
This commit is contained in:
parent
990e806b36
commit
c7cff08f79
|
@ -5034,8 +5034,24 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const {
|
|||
} else {
|
||||
assert(OldSAddrIdx == NewVAddrIdx);
|
||||
|
||||
if (OldVAddrIdx >= 0)
|
||||
if (OldVAddrIdx >= 0) {
|
||||
int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
|
||||
AMDGPU::OpName::vdst_in);
|
||||
|
||||
// RemoveOperand doesn't try to fixup tied operand indexes at it goes, so
|
||||
// it asserts. Untie the operands for now and retie them afterwards.
|
||||
if (NewVDstIn != -1) {
|
||||
int OldVDstIn = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
|
||||
Inst.untieRegOperand(OldVDstIn);
|
||||
}
|
||||
|
||||
Inst.RemoveOperand(OldVAddrIdx);
|
||||
|
||||
if (NewVDstIn != -1) {
|
||||
int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
|
||||
Inst.tieOperands(NewVDst, NewVDstIn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (VAddrDef && MRI.use_nodbg_empty(VAddrDef->getOperand(0).getReg()))
|
||||
|
|
|
@ -31,3 +31,27 @@ bb3: ; preds = %bb3, %bb
|
|||
%i9 = icmp eq i32 %i8, 256
|
||||
br i1 %i9, label %bb2, label %bb3
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_move_load_address_to_vgpr_d16_hi:
|
||||
; GCN-NOT: v_readfirstlane_b32
|
||||
; GCN: global_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}], off glc
|
||||
define amdgpu_kernel void @test_move_load_address_to_vgpr_d16_hi(i16 addrspace(1)* nocapture %arg) {
|
||||
bb:
|
||||
%i1 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 0
|
||||
%load.pre = load volatile i16, i16 addrspace(1)* %i1, align 4
|
||||
%i2 = zext i16 %load.pre to i32
|
||||
br label %bb3
|
||||
|
||||
bb2: ; preds = %bb3
|
||||
ret void
|
||||
|
||||
bb3: ; preds = %bb3, %bb
|
||||
%i = phi i32 [ %i2, %bb ], [ %i8, %bb3 ]
|
||||
%i4 = zext i32 %i to i64
|
||||
%i5 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 %i4
|
||||
%i6 = load volatile i16, i16 addrspace(1)* %i5, align 4
|
||||
%insertelt = insertelement <2 x i16> undef, i16 %i6, i32 1
|
||||
%i8 = bitcast <2 x i16> %insertelt to i32
|
||||
%i9 = icmp eq i32 %i8, 256
|
||||
br i1 %i9, label %bb2, label %bb3
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue