[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s
|
|
|
|
|
|
|
|
# GFX9-LABEL: name: diffoporder_add
|
|
|
|
# GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0, 0
|
|
|
|
# GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0, 0
|
|
|
|
|
|
|
|
name: diffoporder_add
|
|
|
|
body: |
|
|
|
|
bb.0.entry:
|
|
|
|
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
|
|
|
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
|
|
|
%3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
|
|
|
%4:sreg_32_xm0 = COPY $sgpr101
|
|
|
|
%5:sreg_32_xm0 = S_MOV_B32 0
|
|
|
|
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
|
|
|
|
$sgpr4 = COPY %4
|
|
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
|
|
%6:vreg_64 = COPY $vgpr0_vgpr1
|
|
|
|
%7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
|
|
|
|
%8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
|
|
%9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
|
|
|
|
%10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
|
|
|
|
%11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
|
|
|
|
%12:sgpr_32 = COPY %1.sub1
|
|
|
|
%13:vgpr_32 = COPY %5
|
2019-03-19 03:35:44 +08:00
|
|
|
%14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%16:vgpr_32 = COPY %12
|
2019-03-19 03:35:44 +08:00
|
|
|
%17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
|
|
|
|
%20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec
|
2019-03-19 03:35:44 +08:00
|
|
|
%21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec
|
|
|
|
%23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%25:sgpr_32 = S_MOV_B32 4096
|
2019-03-19 03:35:44 +08:00
|
|
|
%26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %25, %21, 0, implicit $exec
|
|
|
|
%28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
|
|
|
|
%31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec
|
|
|
|
%32:sgpr_32 = S_MOV_B32 6144
|
2019-03-19 03:35:44 +08:00
|
|
|
%33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
|
|
|
|
%35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
|
|
|
|
%38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, implicit $exec
|
|
|
|
...
|
|
|
|
---
|
|
|
|
|
|
|
|
# GFX9-LABEL: name: LowestInMiddle
|
|
|
|
# GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11200
|
|
|
|
# GFX9: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
|
|
|
|
# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_5]]
|
|
|
|
# GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
|
|
|
|
# GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -3200, 0, 0
|
|
|
|
#
|
|
|
|
# GFX9: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 6400
|
|
|
|
# GFX9: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
|
|
|
|
# GFX9: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_7]]
|
|
|
|
# GFX9: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
|
|
|
|
# GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, 0,
|
|
|
|
# GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0,
|
|
|
|
|
|
|
|
name: LowestInMiddle
|
|
|
|
body: |
|
|
|
|
bb.0.entry:
|
|
|
|
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
|
|
|
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
|
|
|
%3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
|
|
|
%4:sreg_32_xm0 = COPY $sgpr101
|
|
|
|
%5:sreg_32_xm0 = S_MOV_B32 0
|
|
|
|
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
|
|
|
|
$sgpr4 = COPY %4
|
|
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
|
|
%6:vreg_64 = COPY $vgpr0_vgpr1
|
|
|
|
%7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
|
|
|
|
%8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
|
|
%9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
|
|
|
|
%10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
|
|
|
|
%11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
|
|
|
|
%12:sgpr_32 = COPY %1.sub1
|
|
|
|
%13:vgpr_32 = COPY %5
|
2019-03-19 03:35:44 +08:00
|
|
|
%14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%16:vgpr_32 = COPY %12
|
2019-03-19 03:35:44 +08:00
|
|
|
%17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
|
|
|
|
%20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec
|
2019-03-19 03:35:44 +08:00
|
|
|
%21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec
|
|
|
|
%23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%25:sgpr_32 = S_MOV_B32 8000
|
2019-03-19 03:35:44 +08:00
|
|
|
%26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
|
|
|
|
%28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
|
|
|
|
%31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec
|
|
|
|
%32:sgpr_32 = S_MOV_B32 6400
|
2019-03-19 03:35:44 +08:00
|
|
|
%33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
|
|
|
|
%35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
|
|
|
|
%38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, implicit $exec
|
|
|
|
%39:sgpr_32 = S_MOV_B32 11200
|
2019-03-19 03:35:44 +08:00
|
|
|
%40:vgpr_32, %41:sreg_64_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec
|
|
|
|
%42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
|
|
|
|
%45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, implicit $exec
|
|
|
|
...
|
|
|
|
---
|
|
|
|
|
|
|
|
# GFX9-LABEL: name: NegativeDistance
|
|
|
|
# GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10240
|
|
|
|
# GFX9: [[V_ADD_I32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
|
|
|
|
# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_5]]
|
|
|
|
# GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_4]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
|
|
|
|
# GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -4096, 0, 0
|
|
|
|
# GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0, 0
|
|
|
|
# GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0
|
|
|
|
|
|
|
|
name: NegativeDistance
|
|
|
|
body: |
|
|
|
|
bb.0.entry:
|
|
|
|
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
|
|
|
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
|
|
|
%3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
|
|
|
%4:sreg_32_xm0 = COPY $sgpr101
|
|
|
|
%5:sreg_32_xm0 = S_MOV_B32 0
|
|
|
|
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
|
|
|
|
$sgpr4 = COPY %4
|
|
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
|
|
%6:vreg_64 = COPY $vgpr0_vgpr1
|
|
|
|
%7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
|
|
|
|
%8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
|
|
%9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
|
|
|
|
%10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
|
|
|
|
%11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
|
|
|
|
%12:sgpr_32 = COPY %1.sub1
|
|
|
|
%13:vgpr_32 = COPY %5
|
2019-03-19 03:35:44 +08:00
|
|
|
%14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%16:vgpr_32 = COPY %12
|
2019-03-19 03:35:44 +08:00
|
|
|
%17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
|
|
|
|
%20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec
|
2019-03-19 03:35:44 +08:00
|
|
|
%21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec
|
|
|
|
%23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%25:sgpr_32 = S_MOV_B32 6144
|
2019-03-19 03:35:44 +08:00
|
|
|
%26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
|
|
|
|
%28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
|
|
|
|
%31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec
|
|
|
|
%32:sgpr_32 = S_MOV_B32 8192
|
2019-03-19 03:35:44 +08:00
|
|
|
%33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
|
|
|
|
%35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
|
|
|
|
%38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, implicit $exec
|
|
|
|
%39:sgpr_32 = S_MOV_B32 10240
|
2019-03-19 03:35:44 +08:00
|
|
|
%40:vgpr_32, %41:sreg_64_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec
|
|
|
|
%42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
|
[AMDGPU] Promote constant offset to the immediate by finding a new base with 13bit constant offset from the nearby instructions.
Summary: Promote constant offset to immediate by recomputing the relative 13bit offset from nearby instructions.
E.g.
s_movk_i32 s0, 0x1800
v_add_co_u32_e32 v0, vcc, s0, v2
v_addc_co_u32_e32 v1, vcc, 0, v6, vcc
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[0:1], off
=>
s_movk_i32 s0, 0x1000
v_add_co_u32_e32 v5, vcc, s0, v2
v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
global_load_dwordx2 v[5:6], v[5:6], off
global_load_dwordx2 v[0:1], v[5:6], off offset:2048
Author: FarhanaAleen
Reviewed By: arsenm, rampitec
Subscribers: llvm-commits, AMDGPU
Differential Revision: https://reviews.llvm.org/D55539
llvm-svn: 349196
2018-12-15 05:13:14 +08:00
|
|
|
%44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
|
|
|
|
%45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, implicit $exec
|
|
|
|
...
|
2018-12-19 03:58:39 +08:00
|
|
|
---
|
|
|
|
|
|
|
|
# Tests for a successful compilation.
|
|
|
|
name: assert_hit
|
|
|
|
body: |
|
|
|
|
bb.0.entry:
|
|
|
|
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
|
|
|
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
|
|
|
%3:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
|
|
|
|
%4:sreg_32_xm0 = COPY $sgpr101
|
|
|
|
%5:sreg_32_xm0 = S_MOV_B32 0
|
|
|
|
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
|
|
|
|
$sgpr4 = COPY %4
|
|
|
|
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
|
|
|
%6:vreg_64 = COPY $vgpr0_vgpr1
|
|
|
|
%7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
|
|
|
|
%8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
|
|
%9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
|
|
|
|
%10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
|
|
|
|
%11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
|
|
|
|
%12:sgpr_32 = COPY %1.sub1
|
|
|
|
%13:vgpr_32 = COPY %5
|
2019-03-19 03:35:44 +08:00
|
|
|
%14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec
|
2018-12-19 03:58:39 +08:00
|
|
|
%16:vgpr_32 = COPY %12
|
2019-03-19 03:35:44 +08:00
|
|
|
%17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
|
2018-12-19 03:58:39 +08:00
|
|
|
%19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
|
|
|
|
%20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec
|
2019-03-19 03:35:44 +08:00
|
|
|
%21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec
|
|
|
|
%23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
|
2018-12-19 03:58:39 +08:00
|
|
|
|
|
|
|
%25:sgpr_32 = S_MOV_B32 6144
|
2019-03-19 03:35:44 +08:00
|
|
|
%26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
|
|
|
|
%28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec
|
2018-12-19 03:58:39 +08:00
|
|
|
%30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
|
|
|
|
%31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, implicit $exec
|
|
|
|
...
|