forked from OSchip/llvm-project
AMDGPU: Fix handling of target flags when commuting instruction
If the original register operand had a subregister, it wasn't getting cleared. This resulted in reinterpreted the subreg index as unrecognized target flags, which produced unparseable MIR.
This commit is contained in:
parent
16ea23ff78
commit
a21544ad11
|
@ -1860,6 +1860,9 @@ static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
|
|||
else
|
||||
return nullptr;
|
||||
|
||||
// Make sure we don't reinterpret a subreg index in the target flags.
|
||||
RegOp.setTargetFlags(NonRegOp.getTargetFlags());
|
||||
|
||||
NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug);
|
||||
NonRegOp.setSubReg(SubReg);
|
||||
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=machine-cse -o - %s | FileCheck %s
|
||||
|
||||
--- |
|
||||
define void @commute_instruction_subreg_target_flag() { ret void }
|
||||
define void @commute_target_flag_frame_index() { ret void }
|
||||
define void @commute_target_flag_global() { ret void }
|
||||
declare void @func()
|
||||
|
||||
...
|
||||
|
||||
# Make sure the subreg index is cleared when commuting a register and immediate.
|
||||
|
||||
---
|
||||
name: commute_instruction_subreg_target_flag
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; CHECK-LABEL: name: commute_instruction_subreg_target_flag
|
||||
; CHECK: liveins: $vgpr0_vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]].sub1, 64, 0, implicit $exec
|
||||
; CHECK: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_]]
|
||||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
%1:vgpr_32 = V_ADD_U32_e64 %0.sub1, 64, 0, implicit $exec
|
||||
%2:vgpr_32 = V_ADD_U32_e64 64, %0.sub1, 0, implicit $exec
|
||||
S_ENDPGM 0, implicit %1, implicit %2
|
||||
|
||||
...
|
||||
|
||||
# FIXME: Why doesn't this CSE?
|
||||
---
|
||||
name: commute_target_flag_frame_index
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, type: default, offset: 0, size: 4, alignment: 4 }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
|
||||
; CHECK-LABEL: name: commute_target_flag_frame_index
|
||||
; CHECK: liveins: $vgpr0_vgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, [[COPY]].sub0, 0, implicit $exec
|
||||
; CHECK: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_]]
|
||||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
%1:vgpr_32 = V_ADD_U32_e64 %0.sub0, %stack.0, 0, implicit $exec
|
||||
%2:vgpr_32 = V_ADD_U32_e64 %stack.0, %0.sub0, 0, implicit $exec
|
||||
S_ENDPGM 0, implicit %1, implicit %2
|
||||
|
||||
...
|
||||
|
||||
# FIXME: Handle commuting global variables
|
||||
---
|
||||
name: commute_target_flag_global
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1
|
||||
|
||||
; CHECK-LABEL: name: commute_target_flag_global
|
||||
; CHECK: liveins: $sgpr0_sgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @func, implicit-def dead $scc
|
||||
; CHECK: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @func, [[COPY]].sub0, implicit-def dead $scc
|
||||
; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_1]]
|
||||
%0:sreg_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @func, implicit-def dead $scc
|
||||
%2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @func, %0.sub0, implicit-def dead $scc
|
||||
S_ENDPGM 0, implicit %1, implicit %2
|
||||
|
||||
...
|
Loading…
Reference in New Issue