LiveRegMatrix: Fix some subreg interference checks

Surprisingly, one of the three interference checks in LiveRegMatrix was
using the main live range instead of the apropriate subregister range
resulting in unnecessarily conservative results.

llvm-svn: 296722
This commit is contained in:
Matthias Braun 2017-03-02 00:35:08 +00:00
parent 3095856d80
commit dbcf9e2ee4
8 changed files with 55 additions and 30 deletions

View File

@ -141,7 +141,7 @@ public:
/// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg.
/// This returns a reference to an internal Query data structure that is only
/// valid until the next query() call.
LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned RegUnit);
LiveIntervalUnion::Query &query(const LiveRange &LR, unsigned RegUnit);
/// Directly access the live interval unions per regunit.
/// This returns an array indexed by the regunit number.

View File

@ -175,10 +175,10 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
return Result;
}
LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg,
LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR,
unsigned RegUnit) {
LiveIntervalUnion::Query &Q = Queries[RegUnit];
Q.init(UserTag, VirtReg, Matrix[RegUnit]);
Q.init(UserTag, LR, Matrix[RegUnit]);
return Q;
}
@ -196,9 +196,12 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
return IK_RegUnit;
// Check the matrix for virtual register interference.
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
if (query(VirtReg, *Units).checkInterference())
return IK_VirtReg;
bool Interference = foreachUnit(TRI, VirtReg, PhysReg,
[&](unsigned Unit, const LiveRange &LR) {
return query(LR, Unit).checkInterference();
});
if (Interference)
return IK_VirtReg;
return IK_Free;
}

View File

@ -191,10 +191,10 @@ define void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
; GFX9: flat_load_dword [[A:v[0-9]+]]
; GFX9: flat_load_dword [[B:v[0-9]+]]
; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]]
; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
; GFX9-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GFX9: buffer_store_dwordx4
; VI: flat_load_ushort v[[A_LO:[0-9]+]]
@ -203,9 +203,9 @@ define void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
; VI: flat_load_ushort v[[B_HI:[0-9]+]]
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; VI: v_add_u16_e32
; VI: v_add_u16_e32
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; VI: v_add_u16_e32
; VI: v_add_u16_e32
; VI: buffer_store_dwordx4
define void @v_test_add_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {

View File

@ -5,20 +5,19 @@
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI-DAG: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
; SI: v_cmp_nlt_f32_e32 vcc, v[[B_F32]], v[[A_F32]]
; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
; GCN: s_cbranch_vccnz
; GCN: one{{$}}
; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
; SI: s_branch
; VI: buffer_store_short
; VI: s_endpgm
; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[B_F32]]
; GCN: buffer_store_short
; GCN: s_endpgm
; GCN: two{{$}}
; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[A_F32]]
; GCN: buffer_store_short v[[B_F16]]
; GCN: s_endpgm
define void @br_cc_f16(

View File

@ -1195,9 +1195,8 @@ define void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double ad
; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}
; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}}
; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]]
; GCN-DAG: v_mov_b32_e32 v[[NEG_A_LO:[0-9]+]], v[[A_LO]]
; GCN: buffer_store_dword [[RESULT]]
; GCN: buffer_store_dwordx2 v{{\[}}[[NEG_A_LO]]:[[NEG_A_HI]]{{\]}}
; GCN: buffer_store_dwordx2 v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}}
define void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64

View File

@ -417,10 +417,10 @@ define void @v_insertelement_v2i16_dynamic_sgpr(<2 x i16> addrspace(1)* %out, <2
; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
; GFX89: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
; CI: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
; CI: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]
; CI-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
; CI-DAG: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]
; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@ -444,10 +444,10 @@ define void @v_insertelement_v2i16_dynamic_vgpr(<2 x i16> addrspace(1)* %out, <2
; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
; GFX89: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
; CI: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
; CI: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]
; CI-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
; CI-DAG: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]]
; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]

View File

@ -188,10 +188,10 @@ define void @v_test_sub_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
; GFX9: flat_load_dword [[A:v[0-9]+]]
; GFX9: flat_load_dword [[B:v[0-9]+]]
; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GFX9: v_pk_sub_i16 [[ADD:v[0-9]+]], [[A]], [[B]]
; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
; GFX9-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; GFX9: buffer_store_dwordx4
; VI: flat_load_ushort v[[A_LO:[0-9]+]]
@ -199,10 +199,10 @@ define void @v_test_sub_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
; VI: flat_load_ushort v[[B_LO:[0-9]+]]
; VI: flat_load_ushort v[[B_HI:[0-9]+]]
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; VI: v_subrev_u16_e32
; VI: v_subrev_u16_e32
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; VI-DAG: v_subrev_u16_e32
; VI-DAG: v_subrev_u16_e32
; VI: buffer_store_dwordx4
define void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 {

View File

@ -0,0 +1,24 @@
# RUN: llc -o - %s -mtriple=amdgcn--amdhsa -verify-machineinstrs -run-pass=greedy,virtregrewriter | FileCheck %s
---
# We should not detect any interference between v0/v1 here and only allocate
# sgpr0-sgpr3.
#
# CHECK-LABEL: func0
# CHECK: S_NOP 0, implicit-def %sgpr0
# CHECK: S_NOP 0, implicit-def %sgpr3
# CHECK: S_NOP 0, implicit-def %sgpr1
# CHECK: S_NOP 0, implicit-def %sgpr2
# CHECK: S_NOP 0, implicit %sgpr0, implicit %sgpr3
# CHECK: S_NOP 0, implicit %sgpr1, implicit %sgpr2
name: func0
body: |
bb.0:
S_NOP 0, implicit-def undef %0.sub0 : sreg_128
S_NOP 0, implicit-def %0.sub3
S_NOP 0, implicit-def undef %1.sub1 : sreg_128
S_NOP 0, implicit-def %1.sub2
S_NOP 0, implicit %0.sub0, implicit %0.sub3
S_NOP 0, implicit %1.sub1, implicit %1.sub2
...