forked from OSchip/llvm-project
LiveRegMatrix: Fix some subreg interference checks
Surprisingly, one of the three interference checks in LiveRegMatrix was using the main live range instead of the apropriate subregister range resulting in unnecessarily conservative results. llvm-svn: 296722
This commit is contained in:
parent
3095856d80
commit
dbcf9e2ee4
|
@ -141,7 +141,7 @@ public:
|
|||
/// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg.
|
||||
/// This returns a reference to an internal Query data structure that is only
|
||||
/// valid until the next query() call.
|
||||
LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned RegUnit);
|
||||
LiveIntervalUnion::Query &query(const LiveRange &LR, unsigned RegUnit);
|
||||
|
||||
/// Directly access the live interval unions per regunit.
|
||||
/// This returns an array indexed by the regunit number.
|
||||
|
|
|
@ -175,10 +175,10 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
|
|||
return Result;
|
||||
}
|
||||
|
||||
LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg,
|
||||
LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR,
|
||||
unsigned RegUnit) {
|
||||
LiveIntervalUnion::Query &Q = Queries[RegUnit];
|
||||
Q.init(UserTag, VirtReg, Matrix[RegUnit]);
|
||||
Q.init(UserTag, LR, Matrix[RegUnit]);
|
||||
return Q;
|
||||
}
|
||||
|
||||
|
@ -196,9 +196,12 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
|
|||
return IK_RegUnit;
|
||||
|
||||
// Check the matrix for virtual register interference.
|
||||
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
|
||||
if (query(VirtReg, *Units).checkInterference())
|
||||
return IK_VirtReg;
|
||||
bool Interference = foreachUnit(TRI, VirtReg, PhysReg,
|
||||
[&](unsigned Unit, const LiveRange &LR) {
|
||||
return query(LR, Unit).checkInterference();
|
||||
});
|
||||
if (Interference)
|
||||
return IK_VirtReg;
|
||||
|
||||
return IK_Free;
|
||||
}
|
||||
|
|
|
@ -191,10 +191,10 @@ define void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
|
|||
; GFX9: flat_load_dword [[A:v[0-9]+]]
|
||||
; GFX9: flat_load_dword [[B:v[0-9]+]]
|
||||
|
||||
; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
|
||||
; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]]
|
||||
; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
|
||||
; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
|
||||
; GFX9-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
|
||||
; GFX9: buffer_store_dwordx4
|
||||
|
||||
; VI: flat_load_ushort v[[A_LO:[0-9]+]]
|
||||
|
@ -203,9 +203,9 @@ define void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
|
|||
; VI: flat_load_ushort v[[B_HI:[0-9]+]]
|
||||
|
||||
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
|
||||
; VI: v_add_u16_e32
|
||||
; VI: v_add_u16_e32
|
||||
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
|
||||
; VI: v_add_u16_e32
|
||||
; VI: v_add_u16_e32
|
||||
|
||||
; VI: buffer_store_dwordx4
|
||||
define void @v_test_add_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
|
||||
|
|
|
@ -5,20 +5,19 @@
|
|||
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
|
||||
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
|
||||
|
||||
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
|
||||
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
|
||||
; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
|
||||
; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
|
||||
; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
|
||||
; SI: v_cmp_nlt_f32_e32 vcc, v[[B_F32]], v[[A_F32]]
|
||||
; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
|
||||
; GCN: s_cbranch_vccnz
|
||||
|
||||
; GCN: one{{$}}
|
||||
; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
|
||||
; SI: s_branch
|
||||
; VI: buffer_store_short
|
||||
; VI: s_endpgm
|
||||
; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[B_F32]]
|
||||
; GCN: buffer_store_short
|
||||
; GCN: s_endpgm
|
||||
|
||||
; GCN: two{{$}}
|
||||
; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
|
||||
; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[A_F32]]
|
||||
; GCN: buffer_store_short v[[B_F16]]
|
||||
; GCN: s_endpgm
|
||||
define void @br_cc_f16(
|
||||
|
|
|
@ -1195,9 +1195,8 @@ define void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double ad
|
|||
; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
|
||||
; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}}
|
||||
; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
|
||||
; GCN-DAG: v_mov_b32_e32 v[[NEG_A_LO:[0-9]+]], v[[A_LO]]
|
||||
; GCN: buffer_store_dword [[RESULT]]
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[NEG_A_LO]]:[[NEG_A_HI]]{{\]}}
|
||||
; GCN: buffer_store_dwordx2 v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}}
|
||||
define void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
|
|
|
@ -417,10 +417,10 @@ define void @v_insertelement_v2i16_dynamic_sgpr(<2 x i16> addrspace(1)* %out, <2
|
|||
|
||||
; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
|
||||
; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
|
||||
; GFX89: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
|
||||
; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
|
||||
|
||||
; CI: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
|
||||
; CI: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]
|
||||
; CI-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
|
||||
; CI-DAG: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]
|
||||
|
||||
; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]]
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
|
@ -444,10 +444,10 @@ define void @v_insertelement_v2i16_dynamic_vgpr(<2 x i16> addrspace(1)* %out, <2
|
|||
|
||||
; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
|
||||
; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
|
||||
; GFX89: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
|
||||
; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
|
||||
|
||||
; CI: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
|
||||
; CI: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]
|
||||
; CI-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
|
||||
; CI-DAG: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]
|
||||
|
||||
; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]]
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
|
|
|
@ -188,10 +188,10 @@ define void @v_test_sub_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
|
|||
; GFX9: flat_load_dword [[A:v[0-9]+]]
|
||||
; GFX9: flat_load_dword [[B:v[0-9]+]]
|
||||
|
||||
; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
|
||||
; GFX9: v_pk_sub_i16 [[ADD:v[0-9]+]], [[A]], [[B]]
|
||||
; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
|
||||
; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
|
||||
; GFX9-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
|
||||
; GFX9: buffer_store_dwordx4
|
||||
|
||||
; VI: flat_load_ushort v[[A_LO:[0-9]+]]
|
||||
|
@ -199,10 +199,10 @@ define void @v_test_sub_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
|
|||
; VI: flat_load_ushort v[[B_LO:[0-9]+]]
|
||||
; VI: flat_load_ushort v[[B_HI:[0-9]+]]
|
||||
|
||||
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
|
||||
; VI: v_subrev_u16_e32
|
||||
; VI: v_subrev_u16_e32
|
||||
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
|
||||
; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
|
||||
; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
|
||||
; VI-DAG: v_subrev_u16_e32
|
||||
; VI-DAG: v_subrev_u16_e32
|
||||
|
||||
; VI: buffer_store_dwordx4
|
||||
define void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
# RUN: llc -o - %s -mtriple=amdgcn--amdhsa -verify-machineinstrs -run-pass=greedy,virtregrewriter | FileCheck %s
|
||||
---
|
||||
# We should not detect any interference between v0/v1 here and only allocate
|
||||
# sgpr0-sgpr3.
|
||||
#
|
||||
# CHECK-LABEL: func0
|
||||
# CHECK: S_NOP 0, implicit-def %sgpr0
|
||||
# CHECK: S_NOP 0, implicit-def %sgpr3
|
||||
# CHECK: S_NOP 0, implicit-def %sgpr1
|
||||
# CHECK: S_NOP 0, implicit-def %sgpr2
|
||||
# CHECK: S_NOP 0, implicit %sgpr0, implicit %sgpr3
|
||||
# CHECK: S_NOP 0, implicit %sgpr1, implicit %sgpr2
|
||||
name: func0
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0, implicit-def undef %0.sub0 : sreg_128
|
||||
S_NOP 0, implicit-def %0.sub3
|
||||
S_NOP 0, implicit-def undef %1.sub1 : sreg_128
|
||||
S_NOP 0, implicit-def %1.sub2
|
||||
|
||||
|
||||
S_NOP 0, implicit %0.sub0, implicit %0.sub3
|
||||
S_NOP 0, implicit %1.sub1, implicit %1.sub2
|
||||
...
|
Loading…
Reference in New Issue